diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1741,13 +1741,28 @@ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETONE: + case ISD::SETUEQ: + // If the SETUO or SETO CC isn't legal, we might be able to use + // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one + // of SETOGT/SETOLT to be legal, the other can be emulated by swapping + // the operands. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + if (!TLI.isCondCodeLegal(CC2, OpVT) && + (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) || + TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) { + CC1 = ISD::SETOGT; + CC2 = ISD::SETOLT; + Opc = ISD::OR; + NeedInvert = ((unsigned)CCCode & 0x8U); + break; + } + LLVM_FALLTHROUGH; case ISD::SETOEQ: case ISD::SETOGT: case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: - case ISD::SETONE: - case ISD::SETUEQ: case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll --- a/llvm/test/CodeGen/AMDGPU/setcc.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc.ll @@ -96,11 +96,9 @@ } ; FUNC-LABEL: {{^}}f32_one: -; R600-DAG: SETE_DX10 -; R600-DAG: SETE_DX10 -; R600-DAG: AND_INT -; R600-DAG: SETNE_DX10 -; R600-DAG: AND_INT +; R600-DAG: SETGT_DX10 +; R600-DAG: SETGT_DX10 +; R600-DAG: OR_INT ; R600-DAG: SETNE_INT ; GCN: v_cmp_lg_f32_e32 vcc @@ -128,12 +126,10 @@ } ; FUNC-LABEL: {{^}}f32_ueq: -; R600-DAG: SETNE_DX10 -; R600-DAG: SETNE_DX10 -; R600-DAG: OR_INT -; R600-DAG: SETE_DX10 +; R600-DAG: SETGT_DX10 +; R600-DAG: SETGT_DX10 ; R600-DAG: OR_INT -; R600-DAG: SETNE_INT +; R600-DAG: SETE_INT ; GCN: v_cmp_nlg_f32_e32 vcc ; GCN-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -297,12 +297,10 @@ define i1 @test_fcmpueq(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpueq: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: efscmpeq 1, 4, 4 -; CHECK-NEXT: crnand 20, 5, 1 -; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: efscmpgt 0, 3, 4 +; CHECK-NEXT: efscmplt 1, 3, 4 ; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: crnor 20, 1, 20 +; CHECK-NEXT: cror 20, 5, 1 ; CHECK-NEXT: bc 12, 20, .LBB14_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 @@ -318,12 +316,10 @@ define i1 @test_fcmpne(float %a, float %b) #0 { ; CHECK-LABEL: test_fcmpne: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: efscmpeq 1, 3, 3 -; CHECK-NEXT: crand 20, 5, 1 -; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: efscmplt 0, 3, 4 +; CHECK-NEXT: efscmpgt 1, 3, 4 ; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: crorc 20, 1, 20 +; CHECK-NEXT: crnor 20, 5, 1 ; CHECK-NEXT: bc 12, 20, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 @@ -1117,22 +1113,19 @@ ; SPE-LABEL: test_dcmpueq: ; SPE: # %bb.0: # %entry ; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: evmergelo 5, 5, 6 ; SPE-NEXT: evmergelo 3, 3, 4 -; SPE-NEXT: evmergelo 4, 5, 6 -; SPE-NEXT: efdcmpeq 0, 4, 4 -; SPE-NEXT: bc 4, 1, .LBB16_4 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: bc 12, 1, .LBB16_3 ; SPE-NEXT: # %bb.1: # %entry -; SPE-NEXT: efdcmpeq 0, 3, 3 -; SPE-NEXT: bc 4, 1, .LBB16_4 -; SPE-NEXT: # %bb.2: # %entry -; SPE-NEXT: efdcmpeq 0, 3, 4 -; SPE-NEXT: bc 12, 1, .LBB16_4 -; SPE-NEXT: # %bb.3: # %fa -; SPE-NEXT: li 3, 0 -; SPE-NEXT: b .LBB16_5 -; SPE-NEXT: .LBB16_4: # %tr +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: bc 12, 1, .LBB16_3 +; SPE-NEXT: # %bb.2: # %tr ; SPE-NEXT: li 3, 1 -; SPE-NEXT: .LBB16_5: # %ret +; SPE-NEXT: b .LBB16_4 +; SPE-NEXT: .LBB16_3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB16_4: # %ret ; SPE-NEXT: stw 3, 12(1) ; SPE-NEXT: lwz 3, 12(1) ; SPE-NEXT: addi 1, 1, 16 @@ -1208,14 +1201,12 @@ define i1 @test_dcmpne(double %a, double %b) #0 { ; SPE-LABEL: test_dcmpne: ; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 ; SPE-NEXT: evmergelo 3, 3, 4 -; SPE-NEXT: evmergelo 4, 5, 6 ; SPE-NEXT: li 7, 1 -; SPE-NEXT: efdcmpeq 0, 4, 4 -; SPE-NEXT: efdcmpeq 1, 3, 3 -; SPE-NEXT: efdcmpeq 5, 3, 4 -; SPE-NEXT: crand 24, 5, 1 -; SPE-NEXT: crorc 20, 21, 24 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: efdcmpgt 1, 3, 5 +; SPE-NEXT: crnor 20, 5, 1 ; SPE-NEXT: bc 12, 20, .LBB17_2 ; SPE-NEXT: # %bb.1: # %entry ; SPE-NEXT: ori 3, 7, 0 diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -624,49 +624,33 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvcmpeqsp vs0, v5, v5 -; CHECK-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-NEXT: xvcmpeqsp vs2, v4, v5 -; CHECK-NEXT: xxlnor vs0, vs0, vs0 -; CHECK-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-NEXT: xxlor vs0, vs1, vs0 -; CHECK-NEXT: xxlor vs0, vs2, vs0 +; CHECK-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-NEXT: xxlnor vs0, vs1, vs0 ; CHECK-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test22: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xvcmpeqsp vs0, v5, v5 -; CHECK-REG-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-REG-NEXT: xvcmpeqsp vs2, v4, v5 -; CHECK-REG-NEXT: xxlnor vs0, vs0, vs0 -; CHECK-REG-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-REG-NEXT: xxlor vs0, vs1, vs0 -; CHECK-REG-NEXT: xxlor vs0, vs2, vs0 +; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-REG-NEXT: xxlnor vs0, vs1, vs0 ; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test22: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xvcmpeqsp vs0, v4, v5 -; CHECK-FISL-NEXT: xvcmpeqsp vs1, v5, v5 -; CHECK-FISL-NEXT: xxlnor vs2, vs1, vs1 -; CHECK-FISL-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-FISL-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-FISL-NEXT: xxlor vs1, vs1, vs2 -; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1 +; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4 +; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5 +; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs1 ; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test22: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xvcmpeqsp vs0, v5, v5 -; CHECK-LE-NEXT: xvcmpeqsp vs1, v4, v4 -; CHECK-LE-NEXT: xvcmpeqsp vs2, v4, v5 -; CHECK-LE-NEXT: xxlnor vs0, vs0, vs0 -; CHECK-LE-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-LE-NEXT: xxlor vs0, vs1, vs0 -; CHECK-LE-NEXT: xxlor vs0, vs2, vs0 +; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-LE-NEXT: xxlnor vs0, vs1, vs0 ; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-LE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-br-fcmp.ll @@ -305,24 +305,20 @@ unreachable } -; TODO: feq.s+sltiu+bne -> feq.s+beq define void @br_fcmp_one(double %a, double %b) nounwind { ; RV32IFD-LABEL: br_fcmp_one: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw a0, 0(sp) -; RV32IFD-NEXT: sw a1, 4(sp) -; RV32IFD-NEXT: fld ft0, 0(sp) ; RV32IFD-NEXT: sw a2, 0(sp) ; RV32IFD-NEXT: sw a3, 4(sp) +; RV32IFD-NEXT: fld ft0, 0(sp) +; RV32IFD-NEXT: sw a0, 0(sp) +; RV32IFD-NEXT: sw a1, 4(sp) ; RV32IFD-NEXT: fld ft1, 0(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft1 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: feq.d a1, ft0, ft1 -; RV32IFD-NEXT: not a1, a1 -; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 ; RV32IFD-NEXT: bnez a0, .LBB7_2 ; RV32IFD-NEXT: # %bb.1: # %if.else ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -335,14 +331,11 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addi sp, sp, -16 ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: feq.d a0, ft1, ft1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: and a0, a1, a0 -; RV64IFD-NEXT: feq.d a1, ft0, ft1 -; RV64IFD-NEXT: not a1, a1 -; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: bnez a0, .LBB7_2 ; RV64IFD-NEXT: # %bb.1: # %if.else ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -417,13 +410,11 @@ ; RV32IFD-NEXT: sw a0, 0(sp) ; RV32IFD-NEXT: sw a1, 4(sp) ; RV32IFD-NEXT: fld ft1, 0(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft0 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a0, a0, a1 -; RV32IFD-NEXT: bnez a0, .LBB9_2 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: addi a1, zero, 1 +; RV32IFD-NEXT: bne a0, a1, .LBB9_2 ; RV32IFD-NEXT: # %bb.1: # %if.else ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 @@ -437,13 +428,11 @@ ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IFD-NEXT: fmv.d.x ft0, a1 ; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: feq.d a0, ft1, ft0 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: feq.d a2, ft1, ft1 -; RV64IFD-NEXT: and a1, a2, a1 -; RV64IFD-NEXT: xori a1, a1, 1 -; RV64IFD-NEXT: or a0, a0, a1 -; RV64IFD-NEXT: bnez a0, .LBB9_2 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: addi a1, zero, 1 +; RV64IFD-NEXT: bne a0, a1, .LBB9_2 ; RV64IFD-NEXT: # %bb.1: # %if.else ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll @@ -148,31 +148,25 @@ ; RV32IFD-LABEL: fcmp_one: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: sw a2, 8(sp) ; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft1 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: feq.d a1, ft0, ft1 -; RV32IFD-NEXT: not a1, a1 -; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcmp_one: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: feq.d a0, ft1, ft1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: and a0, a1, a0 -; RV64IFD-NEXT: feq.d a1, ft0, ft1 -; RV64IFD-NEXT: not a1, a1 -; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a1 +; RV64IFD-NEXT: fmv.d.x ft1, a0 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: ret %1 = fcmp one double %a, %b %2 = zext i1 %1 to i32 @@ -218,12 +212,10 @@ ; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft0 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; @@ -231,12 +223,10 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fmv.d.x ft0, a1 ; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: feq.d a0, ft1, ft0 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: feq.d a2, ft1, ft1 -; RV64IFD-NEXT: and a1, a2, a1 -; RV64IFD-NEXT: xori a1, a1, 1 -; RV64IFD-NEXT: or a0, a0, a1 +; RV64IFD-NEXT: flt.d a0, ft1, ft0 +; RV64IFD-NEXT: flt.d a1, ft0, ft1 +; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: xori a0, a0, 1 ; RV64IFD-NEXT: ret %1 = fcmp ueq double %a, %b %2 = zext i1 %1 to i32 diff --git a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll @@ -206,27 +206,23 @@ } define double @select_fcmp_one(double %a, double %b) nounwind { -; TODO: feq.s+sltiu+bne sequence could be optimised ; RV32IFD-LABEL: select_fcmp_one: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: sw a2, 8(sp) ; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft1 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: and a0, a1, a0 -; RV32IFD-NEXT: feq.d a1, ft0, ft1 -; RV32IFD-NEXT: not a1, a1 -; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 ; RV32IFD-NEXT: bnez a0, .LBB6_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fmv.d ft0, ft1 +; RV32IFD-NEXT: fmv.d ft1, ft0 ; RV32IFD-NEXT: .LBB6_2: -; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: fsd ft1, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: addi sp, sp, 16 @@ -234,14 +230,11 @@ ; ; RV64IFD-LABEL: select_fcmp_one: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a0 ; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: feq.d a0, ft1, ft1 -; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: and a0, a1, a0 -; RV64IFD-NEXT: feq.d a1, ft0, ft1 -; RV64IFD-NEXT: not a1, a1 -; RV64IFD-NEXT: and a0, a1, a0 +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: flt.d a0, ft0, ft1 +; RV64IFD-NEXT: flt.d a1, ft1, ft0 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: bnez a0, .LBB6_2 ; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fmv.d ft0, ft1 @@ -304,12 +297,10 @@ ; RV32IFD-NEXT: sw a0, 8(sp) ; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft1, ft0 -; RV32IFD-NEXT: feq.d a1, ft0, ft0 -; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: and a1, a2, a1 -; RV32IFD-NEXT: xori a1, a1, 1 -; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: flt.d a0, ft1, ft0 +; RV32IFD-NEXT: flt.d a1, ft0, ft1 +; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: xori a0, a0, 1 ; RV32IFD-NEXT: bnez a0, .LBB8_2 ; RV32IFD-NEXT: # %bb.1: ; RV32IFD-NEXT: fmv.d ft1, ft0 @@ -324,12 +315,10 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fmv.d.x ft1, a1 ; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft1 -; RV64IFD-NEXT: feq.d a1, ft1, ft1 -; RV64IFD-NEXT: feq.d a2, ft0, ft0 -; RV64IFD-NEXT: and a1, a2, a1 -; RV64IFD-NEXT: xori a1, a1, 1 -; RV64IFD-NEXT: or a0, a0, a1 +; RV64IFD-NEXT: flt.d a0, ft0, ft1 +; RV64IFD-NEXT: flt.d a1, ft1, ft0 +; RV64IFD-NEXT: or a0, a1, a0 +; RV64IFD-NEXT: xori a0, a0, 1 ; RV64IFD-NEXT: bnez a0, .LBB8_2 ; RV64IFD-NEXT: # %bb.1: ; RV64IFD-NEXT: fmv.d ft0, ft1 diff --git a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll --- a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll @@ -282,20 +282,16 @@ unreachable } -; TODO: feq.s+sltiu+bne -> feq.s+beq define void @br_fcmp_one(float %a, float %b) nounwind { ; RV32IF-LABEL: br_fcmp_one: ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: feq.s a0, ft1, ft1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: feq.s a1, ft0, ft1 -; RV32IF-NEXT: not a1, a1 -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: bnez a0, .LBB7_2 ; RV32IF-NEXT: # %bb.1: # %if.else ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -308,14 +304,11 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: feq.s a0, ft1, ft1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: and a0, a1, a0 -; RV64IF-NEXT: feq.s a1, ft0, ft1 -; RV64IF-NEXT: not a1, a1 -; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 ; RV64IF-NEXT: bnez a0, .LBB7_2 ; RV64IF-NEXT: # %bb.1: # %if.else ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -382,13 +375,11 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.w.x ft0, a1 ; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: feq.s a0, ft1, ft0 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: feq.s a2, ft1, ft1 -; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a0, a0, a1 -; RV32IF-NEXT: bnez a0, .LBB9_2 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: addi a1, zero, 1 +; RV32IF-NEXT: bne a0, a1, .LBB9_2 ; RV32IF-NEXT: # %bb.1: # %if.else ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -402,13 +393,11 @@ ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IF-NEXT: fmv.w.x ft0, a1 ; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: feq.s a0, ft1, ft0 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: feq.s a2, ft1, ft1 -; RV64IF-NEXT: and a1, a2, a1 -; RV64IF-NEXT: xori a1, a1, 1 -; RV64IF-NEXT: or a0, a0, a1 -; RV64IF-NEXT: bnez a0, .LBB9_2 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 +; RV64IF-NEXT: addi a1, zero, 1 +; RV64IF-NEXT: bne a0, a1, .LBB9_2 ; RV64IF-NEXT: # %bb.1: # %if.else ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/float-fcmp.ll b/llvm/test/CodeGen/RISCV/float-fcmp.ll --- a/llvm/test/CodeGen/RISCV/float-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-fcmp.ll @@ -117,26 +117,20 @@ define i32 @fcmp_one(float %a, float %b) nounwind { ; RV32IF-LABEL: fcmp_one: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: feq.s a0, ft1, ft1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: feq.s a1, ft0, ft1 -; RV32IF-NEXT: not a1, a1 -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcmp_one: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: feq.s a0, ft1, ft1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: and a0, a1, a0 -; RV64IF-NEXT: feq.s a1, ft0, ft1 -; RV64IF-NEXT: not a1, a1 -; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a1 +; RV64IF-NEXT: fmv.w.x ft1, a0 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 ; RV64IF-NEXT: ret %1 = fcmp one float %a, %b %2 = zext i1 %1 to i32 @@ -171,24 +165,20 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.w.x ft0, a1 ; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: feq.s a0, ft1, ft0 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: feq.s a2, ft1, ft1 -; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: flt.s a0, ft1, ft0 +; RV32IF-NEXT: flt.s a1, ft0, ft1 +; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcmp_ueq: ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft0, a1 ; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: feq.s a0, ft1, ft0 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: feq.s a2, ft1, ft1 -; RV64IF-NEXT: and a1, a2, a1 -; RV64IF-NEXT: xori a1, a1, 1 -; RV64IF-NEXT: or a0, a0, a1 +; RV64IF-NEXT: flt.s a0, ft1, ft0 +; RV64IF-NEXT: flt.s a1, ft0, ft1 +; RV64IF-NEXT: or a0, a1, a0 +; RV64IF-NEXT: xori a0, a0, 1 ; RV64IF-NEXT: ret %1 = fcmp ueq float %a, %b %2 = zext i1 %1 to i32 diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll @@ -165,17 +165,13 @@ } define float @select_fcmp_one(float %a, float %b) nounwind { -; TODO: feq.s+sltiu+bne sequence could be optimised ; RV32IF-LABEL: select_fcmp_one: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a0 ; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: feq.s a0, ft1, ft1 -; RV32IF-NEXT: feq.s a1, ft0, ft0 -; RV32IF-NEXT: and a0, a1, a0 -; RV32IF-NEXT: feq.s a1, ft0, ft1 -; RV32IF-NEXT: not a1, a1 -; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: flt.s a0, ft0, ft1 +; RV32IF-NEXT: flt.s a1, ft1, ft0 +; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: bnez a0, .LBB6_2 ; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: fmv.s ft0, ft1 @@ -185,14 +181,11 @@ ; ; RV64IF-LABEL: select_fcmp_one: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: feq.s a0, ft1, ft1 -; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: and a0, a1, a0 -; RV64IF-NEXT: feq.s a1, ft0, ft1 -; RV64IF-NEXT: not a1, a1 -; RV64IF-NEXT: and a0, a1, a0 +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: flt.s a0, ft0, ft1 +; RV64IF-NEXT: flt.s a1, ft1, ft0 +; RV64IF-NEXT: or a0, a1, a0 ; RV64IF-NEXT: bnez a0, .LBB6_2 ; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fmv.s ft0, ft1 @@ -242,12 +235,10 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.w.x ft1, a1 ; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: feq.s a0, ft0, ft1 -; RV32IF-NEXT: feq.s a1, ft1, ft1 -; RV32IF-NEXT: feq.s a2, ft0, ft0 -; RV32IF-NEXT: and a1, a2, a1 -; RV32IF-NEXT: xori a1, a1, 1 -; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: flt.s a0, ft0, ft1 +; RV32IF-NEXT: flt.s a1, ft1, ft0 +; RV32IF-NEXT: or a0, a1, a0 +; RV32IF-NEXT: xori a0, a0, 1 ; RV32IF-NEXT: bnez a0, .LBB8_2 ; RV32IF-NEXT: # %bb.1: ; RV32IF-NEXT: fmv.s ft0, ft1 @@ -259,12 +250,10 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: fmv.w.x ft1, a1 ; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: feq.s a0, ft0, ft1 -; RV64IF-NEXT: feq.s a1, ft1, ft1 -; RV64IF-NEXT: feq.s a2, ft0, ft0 -; RV64IF-NEXT: and a1, a2, a1 -; RV64IF-NEXT: xori a1, a1, 1 -; RV64IF-NEXT: or a0, a0, a1 +; RV64IF-NEXT: flt.s a0, ft0, ft1 +; RV64IF-NEXT: flt.s a1, ft1, ft0 +; RV64IF-NEXT: or a0, a1, a0 +; RV64IF-NEXT: xori a0, a0, 1 ; RV64IF-NEXT: bnez a0, .LBB8_2 ; RV64IF-NEXT: # %bb.1: ; RV64IF-NEXT: fmv.s ft0, ft1 diff --git a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll --- a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll @@ -258,18 +258,14 @@ unreachable } -; TODO: feq.h+sltiu+bne -> feq.h+beq define void @br_fcmp_one(half %a, half %b) nounwind { ; RV32IZFH-LABEL: br_fcmp_one: ; RV32IZFH: # %bb.0: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: feq.h a0, fa1, fa1 -; RV32IZFH-NEXT: feq.h a1, fa0, fa0 -; RV32IZFH-NEXT: and a0, a1, a0 -; RV32IZFH-NEXT: feq.h a1, fa0, fa1 -; RV32IZFH-NEXT: not a1, a1 -; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 ; RV32IZFH-NEXT: bnez a0, .LBB7_2 ; RV32IZFH-NEXT: # %bb.1: # %if.else ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -282,12 +278,9 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: feq.h a0, fa1, fa1 -; RV64IZFH-NEXT: feq.h a1, fa0, fa0 -; RV64IZFH-NEXT: and a0, a1, a0 -; RV64IZFH-NEXT: feq.h a1, fa0, fa1 -; RV64IZFH-NEXT: not a1, a1 -; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 ; RV64IZFH-NEXT: bnez a0, .LBB7_2 ; RV64IZFH-NEXT: # %bb.1: # %if.else ; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -348,13 +341,11 @@ ; RV32IZFH: # %bb.0: ; RV32IZFH-NEXT: addi sp, sp, -16 ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: feq.h a0, fa0, fa1 -; RV32IZFH-NEXT: feq.h a1, fa1, fa1 -; RV32IZFH-NEXT: feq.h a2, fa0, fa0 -; RV32IZFH-NEXT: and a1, a2, a1 -; RV32IZFH-NEXT: xori a1, a1, 1 -; RV32IZFH-NEXT: or a0, a0, a1 -; RV32IZFH-NEXT: bnez a0, .LBB9_2 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 +; RV32IZFH-NEXT: addi a1, zero, 1 +; RV32IZFH-NEXT: bne a0, a1, .LBB9_2 ; RV32IZFH-NEXT: # %bb.1: # %if.else ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 @@ -366,13 +357,11 @@ ; RV64IZFH: # %bb.0: ; RV64IZFH-NEXT: addi sp, sp, -16 ; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: feq.h a0, fa0, fa1 -; RV64IZFH-NEXT: feq.h a1, fa1, fa1 -; RV64IZFH-NEXT: feq.h a2, fa0, fa0 -; RV64IZFH-NEXT: and a1, a2, a1 -; RV64IZFH-NEXT: xori a1, a1, 1 -; RV64IZFH-NEXT: or a0, a0, a1 -; RV64IZFH-NEXT: bnez a0, .LBB9_2 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 +; RV64IZFH-NEXT: addi a1, zero, 1 +; RV64IZFH-NEXT: bne a0, a1, .LBB9_2 ; RV64IZFH-NEXT: # %bb.1: # %if.else ; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFH-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/half-fcmp.ll b/llvm/test/CodeGen/RISCV/half-fcmp.ll --- a/llvm/test/CodeGen/RISCV/half-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-fcmp.ll @@ -97,22 +97,16 @@ define i32 @fcmp_one(half %a, half %b) nounwind { ; RV32IZFH-LABEL: fcmp_one: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa1, fa1 -; RV32IZFH-NEXT: feq.h a1, fa0, fa0 -; RV32IZFH-NEXT: and a0, a1, a0 -; RV32IZFH-NEXT: feq.h a1, fa0, fa1 -; RV32IZFH-NEXT: not a1, a1 -; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcmp_one: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa1, fa1 -; RV64IZFH-NEXT: feq.h a1, fa0, fa0 -; RV64IZFH-NEXT: and a0, a1, a0 -; RV64IZFH-NEXT: feq.h a1, fa0, fa1 -; RV64IZFH-NEXT: not a1, a1 -; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 ; RV64IZFH-NEXT: ret %1 = fcmp one half %a, %b %2 = zext i1 %1 to i32 @@ -141,22 +135,18 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind { ; RV32IZFH-LABEL: fcmp_ueq: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa0, fa1 -; RV32IZFH-NEXT: feq.h a1, fa1, fa1 -; RV32IZFH-NEXT: feq.h a2, fa0, fa0 -; RV32IZFH-NEXT: and a1, a2, a1 -; RV32IZFH-NEXT: xori a1, a1, 1 -; RV32IZFH-NEXT: or a0, a0, a1 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 +; RV32IZFH-NEXT: xori a0, a0, 1 ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcmp_ueq: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa1 -; RV64IZFH-NEXT: feq.h a1, fa1, fa1 -; RV64IZFH-NEXT: feq.h a2, fa0, fa0 -; RV64IZFH-NEXT: and a1, a2, a1 -; RV64IZFH-NEXT: xori a1, a1, 1 -; RV64IZFH-NEXT: or a0, a0, a1 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 +; RV64IZFH-NEXT: xori a0, a0, 1 ; RV64IZFH-NEXT: ret %1 = fcmp ueq half %a, %b %2 = zext i1 %1 to i32 diff --git a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll --- a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll @@ -135,15 +135,11 @@ } define half @select_fcmp_one(half %a, half %b) nounwind { -; TODO: feq.h+sltiu+bne sequence could be optimised ; RV32IZFH-LABEL: select_fcmp_one: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa1, fa1 -; RV32IZFH-NEXT: feq.h a1, fa0, fa0 -; RV32IZFH-NEXT: and a0, a1, a0 -; RV32IZFH-NEXT: feq.h a1, fa0, fa1 -; RV32IZFH-NEXT: not a1, a1 -; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 ; RV32IZFH-NEXT: bnez a0, .LBB6_2 ; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: fmv.h fa0, fa1 @@ -152,12 +148,9 @@ ; ; RV64IZFH-LABEL: select_fcmp_one: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa1, fa1 -; RV64IZFH-NEXT: feq.h a1, fa0, fa0 -; RV64IZFH-NEXT: and a0, a1, a0 -; RV64IZFH-NEXT: feq.h a1, fa0, fa1 -; RV64IZFH-NEXT: not a1, a1 -; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 ; RV64IZFH-NEXT: bnez a0, .LBB6_2 ; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fmv.h fa0, fa1 @@ -198,12 +191,10 @@ define half @select_fcmp_ueq(half %a, half %b) nounwind { ; RV32IZFH-LABEL: select_fcmp_ueq: ; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: feq.h a0, fa0, fa1 -; RV32IZFH-NEXT: feq.h a1, fa1, fa1 -; RV32IZFH-NEXT: feq.h a2, fa0, fa0 -; RV32IZFH-NEXT: and a1, a2, a1 -; RV32IZFH-NEXT: xori a1, a1, 1 -; RV32IZFH-NEXT: or a0, a0, a1 +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: flt.h a1, fa1, fa0 +; RV32IZFH-NEXT: or a0, a1, a0 +; RV32IZFH-NEXT: xori a0, a0, 1 ; RV32IZFH-NEXT: bnez a0, .LBB8_2 ; RV32IZFH-NEXT: # %bb.1: ; RV32IZFH-NEXT: fmv.h fa0, fa1 @@ -212,12 +203,10 @@ ; ; RV64IZFH-LABEL: select_fcmp_ueq: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: feq.h a0, fa0, fa1 -; RV64IZFH-NEXT: feq.h a1, fa1, fa1 -; RV64IZFH-NEXT: feq.h a2, fa0, fa0 -; RV64IZFH-NEXT: and a1, a2, a1 -; RV64IZFH-NEXT: xori a1, a1, 1 -; RV64IZFH-NEXT: or a0, a0, a1 +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: flt.h a1, fa1, fa0 +; RV64IZFH-NEXT: or a0, a1, a0 +; RV64IZFH-NEXT: xori a0, a0, 1 ; RV64IZFH-NEXT: bnez a0, .LBB8_2 ; RV64IZFH-NEXT: # %bb.1: ; RV64IZFH-NEXT: fmv.h fa0, fa1 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll @@ -304,12 +304,10 @@ ; CHECK-LABEL: fcmp_one_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vmfne.vv v25, v16, v18 -; CHECK-NEXT: vmfeq.vv v26, v18, v18 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v18 +; CHECK-NEXT: vmflt.vv v26, v18, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp one %va, %vb ret %vc @@ -320,13 +318,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v28 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -339,13 +334,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v28, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -457,12 +449,10 @@ ; CHECK-LABEL: fcmp_ueq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vmfeq.vv v25, v16, v18 -; CHECK-NEXT: vmfne.vv v26, v18, v18 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v18 +; CHECK-NEXT: vmflt.vv v26, v18, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp ueq %va, %vb ret %vc @@ -473,13 +463,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v28 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -492,13 +479,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v28, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1221,12 +1205,10 @@ ; CHECK-LABEL: fcmp_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vmfne.vv v25, v16, v20 -; CHECK-NEXT: vmfeq.vv v26, v20, v20 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v20 +; CHECK-NEXT: vmflt.vv v26, v20, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp one %va, %vb ret %vc @@ -1236,13 +1218,10 @@ ; CHECK-LABEL: fcmp_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1254,13 +1233,10 @@ ; CHECK-LABEL: fcmp_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1368,12 +1344,10 @@ ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vmfeq.vv v25, v16, v20 -; CHECK-NEXT: vmfne.vv v26, v20, v20 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v20 +; CHECK-NEXT: vmflt.vv v26, v20, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp ueq %va, %vb ret %vc @@ -1383,13 +1357,10 @@ ; CHECK-LABEL: fcmp_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1401,13 +1372,10 @@ ; CHECK-LABEL: fcmp_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2122,12 +2090,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v25, v16, v8 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 -; CHECK-NEXT: vmfeq.vv v27, v8, v8 +; CHECK-NEXT: vmflt.vv v25, v16, v8 +; CHECK-NEXT: vmflt.vv v26, v8, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp one %va, %vb ret %vc @@ -2137,13 +2103,10 @@ ; CHECK-LABEL: fcmp_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2155,13 +2118,10 @@ ; CHECK-LABEL: fcmp_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2273,12 +2233,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfeq.vv v25, v16, v8 -; CHECK-NEXT: vmfne.vv v26, v16, v16 -; CHECK-NEXT: vmfne.vv v27, v8, v8 +; CHECK-NEXT: vmflt.vv v25, v16, v8 +; CHECK-NEXT: vmflt.vv v26, v8, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp ueq %va, %vb ret %vc @@ -2288,13 +2246,10 @@ ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2306,13 +2261,10 @@ ; CHECK-LABEL: fcmp_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll @@ -304,12 +304,10 @@ ; CHECK-LABEL: fcmp_one_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vmfne.vv v25, v16, v18 -; CHECK-NEXT: vmfeq.vv v26, v18, v18 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v18 +; CHECK-NEXT: vmflt.vv v26, v18, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp one %va, %vb ret %vc @@ -320,13 +318,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v28 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -339,13 +334,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v28, v26, fa0 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v28, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -457,12 +449,10 @@ ; CHECK-LABEL: fcmp_ueq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vmfeq.vv v25, v16, v18 -; CHECK-NEXT: vmfne.vv v26, v18, v18 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v18 +; CHECK-NEXT: vmflt.vv v26, v18, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp ueq %va, %vb ret %vc @@ -473,13 +463,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v28 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -492,13 +479,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $f10_h killed $f10_h def $f10_f ; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu -; CHECK-NEXT: vfmv.v.f v26, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v28, v26, fa0 -; CHECK-NEXT: vmfne.vv v26, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v28, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, half %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1221,12 +1205,10 @@ ; CHECK-LABEL: fcmp_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vmfne.vv v25, v16, v20 -; CHECK-NEXT: vmfeq.vv v26, v20, v20 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v20 +; CHECK-NEXT: vmflt.vv v26, v20, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp one %va, %vb ret %vc @@ -1236,13 +1218,10 @@ ; CHECK-LABEL: fcmp_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1254,13 +1233,10 @@ ; CHECK-LABEL: fcmp_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v28, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1368,12 +1344,10 @@ ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vmfeq.vv v25, v16, v20 -; CHECK-NEXT: vmfne.vv v26, v20, v20 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vv v25, v16, v20 +; CHECK-NEXT: vmflt.vv v26, v20, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp ueq %va, %vb ret %vc @@ -1383,13 +1357,10 @@ ; CHECK-LABEL: fcmp_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -1401,13 +1372,10 @@ ; CHECK-LABEL: fcmp_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vfmv.v.f v28, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v28, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, float %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2122,12 +2090,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v25, v16, v8 -; CHECK-NEXT: vmfeq.vv v26, v16, v16 -; CHECK-NEXT: vmfeq.vv v27, v8, v8 +; CHECK-NEXT: vmflt.vv v25, v16, v8 +; CHECK-NEXT: vmflt.vv v26, v8, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp one %va, %vb ret %vc @@ -2137,13 +2103,10 @@ ; CHECK-LABEL: fcmp_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v27, v26 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2155,13 +2118,10 @@ ; CHECK-LABEL: fcmp_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfne.vf v25, v16, fa0 -; CHECK-NEXT: vmfeq.vf v26, v8, fa0 -; CHECK-NEXT: vmfeq.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v26, v26, v27 -; CHECK-NEXT: vmand.mm v0, v25, v26 +; CHECK-NEXT: vmor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2273,12 +2233,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfeq.vv v25, v16, v8 -; CHECK-NEXT: vmfne.vv v26, v16, v16 -; CHECK-NEXT: vmfne.vv v27, v8, v8 +; CHECK-NEXT: vmflt.vv v25, v16, v8 +; CHECK-NEXT: vmflt.vv v26, v8, v16 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %vc = fcmp ueq %va, %vb ret %vc @@ -2288,13 +2246,10 @@ ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmflt.vf v25, v16, fa0 +; CHECK-NEXT: vmfgt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v27, v26 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -2306,13 +2261,10 @@ ; CHECK-LABEL: fcmp_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vmfeq.vf v25, v16, fa0 -; CHECK-NEXT: vmfne.vf v26, v8, fa0 -; CHECK-NEXT: vmfne.vv v27, v16, v16 +; CHECK-NEXT: vmfgt.vf v25, v16, fa0 +; CHECK-NEXT: vmflt.vf v26, v16, fa0 ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v26, v26, v27 -; CHECK-NEXT: vmor.mm v0, v25, v26 +; CHECK-NEXT: vmnor.mm v0, v26, v25 ; CHECK-NEXT: ret %head = insertelement undef, double %b, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll b/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll --- a/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll +++ b/llvm/test/CodeGen/WebAssembly/comparisons-f32.ll @@ -102,16 +102,14 @@ ; CHECK-NEXT: .functype ueq_f32 (f32, f32) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} -; CHECK-NEXT: return $pop[[NUM4]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} +; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 +; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}} +; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i32 @ueq_f32(float %x, float %y) { %a = fcmp ueq float %x, %y %b = zext i1 %a to i32 @@ -122,15 +120,11 @@ ; CHECK-NEXT: .functype one_f32 (f32, f32) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f32.eq $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f32.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM4]] define i32 @one_f32(float %x, float %y) { %a = fcmp one float %x, %y diff --git a/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll b/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll --- a/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll +++ b/llvm/test/CodeGen/WebAssembly/comparisons-f64.ll @@ -101,16 +101,14 @@ ; CHECK-NEXT: .functype ueq_f64 (f64, f64) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} -; CHECK-NEXT: return $pop[[NUM4]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} +; CHECK-NEXT: i32.const $push[[C0:[0-9]+]]=, 1 +; CHECK-NEXT: i32.xor $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $pop[[C0]]{{$}} +; CHECK-NEXT: return $pop[[NUM3]]{{$}} define i32 @ueq_f64(double %x, double %y) { %a = fcmp ueq double %x, %y %b = zext i1 %a to i32 @@ -121,15 +119,11 @@ ; CHECK-NEXT: .functype one_f64 (f64, f64) -> (i32){{$}} ; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}} ; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} +; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; CHECK-NEXT: local.get $push[[L2:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 0{{$}} -; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; CHECK-NEXT: local.get $push[[L4:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: local.get $push[[L5:[0-9]+]]=, 1{{$}} -; CHECK-NEXT: f64.eq $push[[NUM2:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}} -; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}} +; CHECK-NEXT: local.get $push[[L3:[0-9]+]]=, 1{{$}} +; CHECK-NEXT: f64.lt $push[[NUM1:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} +; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}} ; CHECK-NEXT: return $pop[[NUM4]] define i32 @one_f64(double %x, double %y) { %a = fcmp one double %x, %y diff --git a/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll b/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll --- a/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-comparisons.ll @@ -1000,11 +1000,9 @@ ; CHECK-LABEL: compare_one_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_one_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i1> @compare_one_v4f32 (<4 x float> %x, <4 x float> %y) { %res = fcmp one <4 x float> %x, %y @@ -1024,11 +1022,9 @@ ; CHECK-LABEL: compare_sext_one_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_sext_one_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @compare_sext_one_v4f32 (<4 x float> %x, <4 x float> %y) { %cmp = fcmp one <4 x float> %x, %y @@ -1100,11 +1096,10 @@ ; CHECK-LABEL: compare_ueq_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_ueq_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i1> @compare_ueq_v4f32 (<4 x float> %x, <4 x float> %y) { %res = fcmp ueq <4 x float> %x, %y @@ -1124,11 +1119,10 @@ ; CHECK-LABEL: compare_sext_ueq_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_sext_ueq_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f32x4.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f32x4.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]] define <4 x i32> @compare_sext_ueq_v4f32 (<4 x float> %x, <4 x float> %y) { %cmp = fcmp ueq <4 x float> %x, %y @@ -1628,11 +1622,9 @@ ; CHECK-LABEL: compare_one_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_one_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i1> @compare_one_v2f64 (<2 x double> %x, <2 x double> %y) { %res = fcmp one <2 x double> %x, %y @@ -1652,11 +1644,9 @@ ; CHECK-LABEL: compare_sext_one_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_sext_one_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @compare_sext_one_v2f64 (<2 x double> %x, <2 x double> %y) { %cmp = fcmp one <2 x double> %x, %y @@ -1728,11 +1718,10 @@ ; CHECK-LABEL: compare_ueq_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_ueq_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i1> @compare_ueq_v2f64 (<2 x double> %x, <2 x double> %y) { %res = fcmp ueq <2 x double> %x, %y @@ -1752,11 +1741,10 @@ ; CHECK-LABEL: compare_sext_ueq_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_sext_ueq_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T2:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[T3:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T3]]{{$}} +; SIMD128-NEXT: f64x2.gt $push[[T0:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: f64x2.lt $push[[T1:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: v128.or $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @compare_sext_ueq_v2f64 (<2 x double> %x, <2 x double> %y) { %cmp = fcmp ueq <2 x double> %x, %y