Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2085,14 +2085,19 @@ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, OpVT); } - } else if (N1CFP && OpVT.isSimple()) { + } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { // Ensure that the constant occurs on the RHS. ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) return SDValue(); return getSetCC(dl, VT, N2, N1, SwappedCond); - } else if (N2CFP && N2CFP->getValueAPF().isNaN()) { - // If an operand is known to be a nan, we can fold it. + } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || + (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { + // If an operand is known to be a nan (or undef that could be a nan), we can + // fold it. + // Choosing NaN for the undef will always make unordered comparison succeed + // and ordered comparison fails. + // Matches behavior in llvm::ConstantFoldCompareInstruction. switch (ISD::getUnorderedFlavor(Cond)) { default: llvm_unreachable("Unknown flavor!"); Index: llvm/trunk/test/CodeGen/AArch64/half.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/half.ll +++ llvm/trunk/test/CodeGen/AArch64/half.ll @@ -96,24 +96,27 @@ ret void } -define i16 @test_fccmp(i1 %a) { +define i16 @test_fccmp(i1 %a, i16 %in) { ; CHECK-LABEL: test_fccmp: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #24576 +; CHECK-NEXT: fmov s0, w1 ; CHECK-NEXT: movk w8, #15974, lsl #16 ; CHECK-NEXT: mov w9, #16384 +; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: movk w9, #15428, lsl #16 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: cset w8, pl -; CHECK-NEXT: fccmp s0, s0, #8, pl +; CHECK-NEXT: fccmp s0, s1, #8, pl ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: csinc w9, w9, wzr, mi ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret - %cmp0 = fcmp ogt half 0xH3333, undef - %cmp1 = fcmp ogt half 0xH2222, undef + %f16 = bitcast i16 %in to half + %cmp0 = fcmp ogt half 0xH3333, %f16 + %cmp1 = fcmp ogt half 0xH2222, %f16 %x = select i1 %cmp0, i16 0, i16 1 %or = or i1 %cmp1, %cmp0 %y = select i1 %or, i16 4, i16 1 Index: llvm/trunk/test/CodeGen/ARM/fcmp-xo.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fcmp-xo.ll +++ llvm/trunk/test/CodeGen/ARM/fcmp-xo.ll @@ -2,34 +2,35 @@ ; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+fp-armv8 | FileCheck %s --check-prefixes=CHECK,VMOVSR ; RUN: llc < %s -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+fp-armv8,+neon,+neonfp | FileCheck %s --check-prefixes=CHECK,NEON -define arm_aapcs_vfpcc float @foo0() local_unnamed_addr { +define arm_aapcs_vfpcc float @foo0(float %a0) local_unnamed_addr { ; CHECK-LABEL: foo0: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s0, #5.000000e-01 -; CHECK-NEXT: vmov.f32 s2, #-5.000000e-01 ; CHECK-NEXT: vcmpe.f32 s0, #0 +; CHECK-NEXT: vmov.f32 s2, #5.000000e-01 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov.f32 s4, #-5.000000e-01 ; CHECK-NEXT: it mi -; CHECK-NEXT: vmovmi.f32 s0, s2 +; CHECK-NEXT: vmovmi.f32 s2, s4 +; CHECK-NEXT: vmov.f32 s0, s2 ; CHECK-NEXT: bx lr - %1 = fcmp nsz olt float undef, 0.000000e+00 + %1 = fcmp nsz olt float %a0, 0.000000e+00 %2 = select i1 %1, float -5.000000e-01, float 5.000000e-01 ret float %2 } -define arm_aapcs_vfpcc float @float1() local_unnamed_addr { +define arm_aapcs_vfpcc float @float1(float %a0) local_unnamed_addr { ; CHECK-LABEL: float1: ; CHECK: @ %bb.0: @ %.end -; CHECK-NEXT: vmov.f32 s0, #1.000000e+00 -; CHECK-NEXT: vmov.f32 s2, #5.000000e-01 -; CHECK-NEXT: vmov.f32 s4, #-5.000000e-01 -; CHECK-NEXT: vcmpe.f32 s0, s0 +; CHECK-NEXT: vmov.f32 s2, #1.000000e+00 +; CHECK-NEXT: vmov.f32 s4, #5.000000e-01 +; CHECK-NEXT: vmov.f32 s6, #-5.000000e-01 +; CHECK-NEXT: vcmpe.f32 s2, s0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vselgt.f32 s0, s4, s2 +; CHECK-NEXT: vselgt.f32 s0, s6, s4 ; CHECK-NEXT: bx lr br i1 undef, label %.end, label %1 - %2 = fcmp nsz olt float undef, 1.000000e+00 + %2 = fcmp nsz olt float %a0, 1.000000e+00 %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01 br label %.end @@ -38,62 +39,62 @@ ret float %4 } -define arm_aapcs_vfpcc float @float128() local_unnamed_addr { +define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr { ; VMOVSR-LABEL: float128: ; VMOVSR: @ %bb.0: ; VMOVSR-NEXT: mov.w r0, #1124073472 -; VMOVSR-NEXT: vmov.f32 s2, #5.000000e-01 -; VMOVSR-NEXT: vmov s0, r0 -; VMOVSR-NEXT: vmov.f32 s4, #-5.000000e-01 -; VMOVSR-NEXT: vcmpe.f32 s0, s0 +; VMOVSR-NEXT: vmov.f32 s4, #5.000000e-01 +; VMOVSR-NEXT: vmov s2, r0 +; VMOVSR-NEXT: vmov.f32 s6, #-5.000000e-01 +; VMOVSR-NEXT: vcmpe.f32 s2, s0 ; VMOVSR-NEXT: vmrs APSR_nzcv, fpscr -; VMOVSR-NEXT: vselgt.f32 s0, s4, s2 +; VMOVSR-NEXT: vselgt.f32 s0, s6, s4 ; VMOVSR-NEXT: bx lr ; ; NEON-LABEL: float128: ; NEON: @ %bb.0: -; NEON-NEXT: vmov.f32 s0, #5.000000e-01 ; NEON-NEXT: mov.w r0, #1124073472 -; NEON-NEXT: vmov d2, r0, r0 -; NEON-NEXT: vmov.f32 s2, #-5.000000e-01 -; NEON-NEXT: vcmpe.f32 s4, s0 +; NEON-NEXT: vmov.f32 s2, #5.000000e-01 +; NEON-NEXT: vmov d3, r0, r0 +; NEON-NEXT: vmov.f32 s4, #-5.000000e-01 +; NEON-NEXT: vcmpe.f32 s6, s0 ; NEON-NEXT: vmrs APSR_nzcv, fpscr -; NEON-NEXT: vselgt.f32 s0, s2, s0 +; NEON-NEXT: vselgt.f32 s0, s4, s2 ; NEON-NEXT: bx lr - %1 = fcmp nsz olt float undef, 128.000000e+00 + %1 = fcmp nsz olt float %a0, 128.000000e+00 %2 = select i1 %1, float -5.000000e-01, float 5.000000e-01 ret float %2 } -define arm_aapcs_vfpcc double @double1() local_unnamed_addr { +define arm_aapcs_vfpcc double @double1(double %a0) local_unnamed_addr { ; CHECK-LABEL: double1: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 ; CHECK-NEXT: vmov.f64 d18, #1.000000e+00 -; CHECK-NEXT: vcmpe.f64 d18, d16 +; CHECK-NEXT: vcmpe.f64 d18, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 ; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01 ; CHECK-NEXT: vselgt.f64 d0, d17, d16 ; CHECK-NEXT: bx lr - %1 = fcmp nsz olt double undef, 1.000000e+00 + %1 = fcmp nsz olt double %a0, 1.000000e+00 %2 = select i1 %1, double -5.000000e-01, double 5.000000e-01 ret double %2 } -define arm_aapcs_vfpcc double @double128() local_unnamed_addr { +define arm_aapcs_vfpcc double @double128(double %a0) local_unnamed_addr { ; CHECK-LABEL: double128: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: movt r0, #16480 ; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: movt r0, #16480 +; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 ; CHECK-NEXT: vmov d18, r1, r0 -; CHECK-NEXT: vcmpe.f64 d18, d16 +; CHECK-NEXT: vcmpe.f64 d18, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01 ; CHECK-NEXT: vselgt.f64 d0, d17, d16 ; CHECK-NEXT: bx lr - %1 = fcmp nsz olt double undef, 128.000000e+00 + %1 = fcmp nsz olt double %a0, 128.000000e+00 %2 = select i1 %1, double -5.000000e-01, double 5.000000e-01 ret double %2 } Index: llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll +++ llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll @@ -701,22 +701,23 @@ ; 34. VRINTZ ; 35. VSELEQ -define half @select_cc1() { - %1 = fcmp nsz oeq half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc1(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz oeq half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc1: -; CHECK-HARDFP-FULLFP16: vcmp.f16 s0, s0 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vcmp.f16 s6, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16: vseleq.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmp.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmoveq.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmp.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmp.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32: it eq ; CHECK-SOFTFP-FP16-T32: vmoveq.f32 s{{.}}, s{{.}} @@ -727,170 +728,178 @@ ; be encoded as an FP16 immediate need to be added here. ; ; 36. VSELGE -define half @select_cc_ge1() { - %1 = fcmp nsz oge half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_ge1(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz oge half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_ge1: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 -; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 +; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovge.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it ge ; CHECK-SOFTFP-FP16-T32-NEXT: vmovge.f32 s{{.}}, s{{.}} } -define half @select_cc_ge2() { - %1 = fcmp nsz ole half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_ge2(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz ole half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_ge2: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovls.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it ls ; CHECK-SOFTFP-FP16-T32-NEXT: vmovls.f32 s{{.}}, s{{.}} } -define half @select_cc_ge3() { - %1 = fcmp nsz ugt half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_ge3(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz ugt half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_ge3: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovhi.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it hi ; CHECK-SOFTFP-FP16-T32-NEXT: vmovhi.f32 s{{.}}, s{{.}} } -define half @select_cc_ge4() { - %1 = fcmp nsz ult half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_ge4(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz ult half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_ge4: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselge.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovlt.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it lt ; CHECK-SOFTFP-FP16-T32-NEXT: vmovlt.f32 s{{.}}, s{{.}} } ; 37. VSELGT -define half @select_cc_gt1() { - %1 = fcmp nsz ogt half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_gt1(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz ogt half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_gt1: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovgt.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it gt ; CHECK-SOFTFP-FP16-T32-NEXT: vmovgt.f32 s{{.}}, s{{.}} } -define half @select_cc_gt2() { - %1 = fcmp nsz uge half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_gt2(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz uge half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_gt2: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovpl.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it pl ; CHECK-SOFTFP-FP16-T32-NEXT: vmovpl.f32 s{{.}}, s{{.}} } -define half @select_cc_gt3() { - %1 = fcmp nsz ule half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_gt3(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz ule half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_gt3: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s6, s0 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovle.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it le ; CHECK-SOFTFP-FP16-T32-NEXT: vmovle.f32 s{{.}}, s{{.}} } -define half @select_cc_gt4() { - %1 = fcmp nsz olt half undef, 0xH0001 - %2 = select i1 %1, half 0xHC000, half 0xH0002 - ret half %2 +define half @select_cc_gt4(half* %a0) { + %1 = load half, half* %a0 + %2 = fcmp nsz olt half %1, 0xH0001 + %3 = select i1 %2, half 0xHC000, half 0xH0002 + ret half %3 ; CHECK-LABEL: select_cc_gt4: -; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s0 +; CHECK-HARDFP-FULLFP16: vcmpe.f16 s0, s6 ; CHECK-HARDFP-FULLFP16-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-HARDFP-FULLFP16-NEXT: vselgt.f16 s0, s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-A32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-A32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-A32-NEXT: vmovmi.f32 s{{.}}, s{{.}} -; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s0, s0 +; CHECK-SOFTFP-FP16-T32: vcmpe.f32 s6, s0 ; CHECK-SOFTFP-FP16-T32-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-SOFTFP-FP16-T32-NEXT: it mi ; CHECK-SOFTFP-FP16-T32-NEXT: vmovmi.f32 s{{.}}, s{{.}} Index: llvm/trunk/test/CodeGen/ARM/vcge.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/vcge.ll +++ llvm/trunk/test/CodeGen/ARM/vcge.ll @@ -279,9 +279,10 @@ ; Radar 8782191 ; Floating-point comparisons against zero produce results with integer ; elements, not floating-point elements. -define void @test_vclez_fp() nounwind optsize { +define void @test_vclez_fp(<4 x float>* %A) nounwind optsize { ; CHECK-LABEL: test_vclez_fp: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcle.f32 q8, q8, #0 ; CHECK-NEXT: vmovn.i32 d16, q8 ; CHECK-NEXT: vmov.i8 d17, #0x1 @@ -289,7 +290,8 @@ ; CHECK-NEXT: vadd.i8 d16, d16, d17 ; CHECK-NEXT: vst1.8 {d16}, [r0] entry: - %0 = fcmp ole <4 x float> undef, zeroinitializer + %ld = load <4 x float>, <4 x float>* %A + %0 = fcmp ole <4 x float> %ld, zeroinitializer %1 = sext <4 x i1> %0 to <4 x i16> %2 = add <4 x i16> %1, zeroinitializer %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> Index: llvm/trunk/test/CodeGen/SPARC/fp128.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/fp128.ll +++ llvm/trunk/test/CodeGen/SPARC/fp128.ll @@ -73,10 +73,11 @@ ; SOFT: _Q_cmp ; SOFT: cmp -define i32 @f128_compare2() { +define i32 @f128_compare2(fp128* byval %f0) { entry: - %0 = fcmp ogt fp128 undef, 0xL00000000000000000000000000000000 - br i1 %0, label %"5", label %"7" + %0 = load fp128, fp128* %f0, align 8 + %1 = fcmp ogt fp128 %0, 0xL00000000000000000000000000000000 + br i1 %1, label %"5", label %"7" "5": ; preds = %entry ret i32 0 Index: llvm/trunk/test/CodeGen/X86/block-placement.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/block-placement.ll +++ llvm/trunk/test/CodeGen/X86/block-placement.ll @@ -469,7 +469,7 @@ ret i32 %merge } -define void @fpcmp_unanalyzable_branch(i1 %cond) { +define void @fpcmp_unanalyzable_branch(i1 %cond, double %a0) { ; This function's CFG contains an once-unanalyzable branch (une on floating ; points). As now it becomes analyzable, we should get best layout in which each ; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is @@ -498,7 +498,7 @@ br i1 undef, label %if.end, label %exit exit: - %cmp.i = fcmp une double 0.000000e+00, undef + %cmp.i = fcmp une double 0.000000e+00, %a0 br i1 %cmp.i, label %if.then, label %if.end, !prof !3 if.then: @@ -641,7 +641,7 @@ br label %loop1 } -define void @unanalyzable_branch_to_loop_header() { +define void @unanalyzable_branch_to_loop_header(double %a0) { ; Ensure that we can handle unanalyzable branches into loop headers. We ; pre-form chains for unanalyzable branches, and will find the tail end of that ; at the start of the loop. This function uses floating point comparison @@ -654,7 +654,7 @@ ; CHECK: %exit entry: - %cmp = fcmp une double 0.000000e+00, undef + %cmp = fcmp une double 0.000000e+00, %a0 br i1 %cmp, label %loop, label %exit loop: @@ -665,7 +665,7 @@ ret void } -define void @unanalyzable_branch_to_best_succ(i1 %cond) { +define void @unanalyzable_branch_to_best_succ(i1 %cond, double %a0) { ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the optimal successor to merge. ; @@ -683,7 +683,7 @@ br i1 %cond, label %bar, label %foo, !prof !1 foo: - %cmp = fcmp une double 0.000000e+00, undef + %cmp = fcmp une double 0.000000e+00, %a0 br i1 %cmp, label %bar, label %exit bar: @@ -713,7 +713,7 @@ br label %c b: - %cmp = fcmp une float %x, undef + %cmp = fcmp une float %x, 0.0 br i1 %cmp, label %c, label %exit c: @@ -736,199 +736,199 @@ br label %0 %val0 = load volatile float, float* undef - %cmp0 = fcmp une float %val0, undef + %cmp0 = fcmp une float %val0, 0.0 br i1 %cmp0, label %1, label %0 %val1 = load volatile float, float* undef - %cmp1 = fcmp une float %val1, undef + %cmp1 = fcmp une float %val1, 0.0 br i1 %cmp1, label %2, label %1 %val2 = load volatile float, float* undef - %cmp2 = fcmp une float %val2, undef + %cmp2 = fcmp une float %val2, 0.0 br i1 %cmp2, label %3, label %2 %val3 = load volatile float, float* undef - %cmp3 = fcmp une float %val3, undef + %cmp3 = fcmp une float %val3, 0.0 br i1 %cmp3, label %4, label %3 %val4 = load volatile float, float* undef - %cmp4 = fcmp une float %val4, undef + %cmp4 = fcmp une float %val4, 0.0 br i1 %cmp4, label %5, label %4 %val5 = load volatile float, float* undef - %cmp5 = fcmp une float %val5, undef + %cmp5 = fcmp une float %val5, 0.0 br i1 %cmp5, label %6, label %5 %val6 = load volatile float, float* undef - %cmp6 = fcmp une float %val6, undef + %cmp6 = fcmp une float %val6, 0.0 br i1 %cmp6, label %7, label %6 %val7 = load volatile float, float* undef - %cmp7 = fcmp une float %val7, undef + %cmp7 = fcmp une float %val7, 0.0 br i1 %cmp7, label %8, label %7 %val8 = load volatile float, float* undef - %cmp8 = fcmp une float %val8, undef + %cmp8 = fcmp une float %val8, 0.0 br i1 %cmp8, label %9, label %8 %val9 = load volatile float, float* undef - %cmp9 = fcmp une float %val9, undef + %cmp9 = fcmp une float %val9, 0.0 br i1 %cmp9, label %10, label %9 %val10 = load volatile float, float* undef - %cmp10 = fcmp une float %val10, undef + %cmp10 = fcmp une float %val10, 0.0 br i1 %cmp10, label %11, label %10 %val11 = load volatile float, float* undef - %cmp11 = fcmp une float %val11, undef + %cmp11 = fcmp une float %val11, 0.0 br i1 %cmp11, label %12, label %11 %val12 = load volatile float, float* undef - %cmp12 = fcmp une float %val12, undef + %cmp12 = fcmp une float %val12, 0.0 br i1 %cmp12, label %13, label %12 %val13 = load volatile float, float* undef - %cmp13 = fcmp une float %val13, undef + %cmp13 = fcmp une float %val13, 0.0 br i1 %cmp13, label %14, label %13 %val14 = load volatile float, float* undef - %cmp14 = fcmp une float %val14, undef + %cmp14 = fcmp une float %val14, 0.0 br i1 %cmp14, label %15, label %14 %val15 = load volatile float, float* undef - %cmp15 = fcmp une float %val15, undef + %cmp15 = fcmp une float %val15, 0.0 br i1 %cmp15, label %16, label %15 %val16 = load volatile float, float* undef - %cmp16 = fcmp une float %val16, undef + %cmp16 = fcmp une float %val16, 0.0 br i1 %cmp16, label %17, label %16 %val17 = load volatile float, float* undef - %cmp17 = fcmp une float %val17, undef + %cmp17 = fcmp une float %val17, 0.0 br i1 %cmp17, label %18, label %17 %val18 = load volatile float, float* undef - %cmp18 = fcmp une float %val18, undef + %cmp18 = fcmp une float %val18, 0.0 br i1 %cmp18, label %19, label %18 %val19 = load volatile float, float* undef - %cmp19 = fcmp une float %val19, undef + %cmp19 = fcmp une float %val19, 0.0 br i1 %cmp19, label %20, label %19 %val20 = load volatile float, float* undef - %cmp20 = fcmp une float %val20, undef + %cmp20 = fcmp une float %val20, 0.0 br i1 %cmp20, label %21, label %20 %val21 = load volatile float, float* undef - %cmp21 = fcmp une float %val21, undef + %cmp21 = fcmp une float %val21, 0.0 br i1 %cmp21, label %22, label %21 %val22 = load volatile float, float* undef - %cmp22 = fcmp une float %val22, undef + %cmp22 = fcmp une float %val22, 0.0 br i1 %cmp22, label %23, label %22 %val23 = load volatile float, float* undef - %cmp23 = fcmp une float %val23, undef + %cmp23 = fcmp une float %val23, 0.0 br i1 %cmp23, label %24, label %23 %val24 = load volatile float, float* undef - %cmp24 = fcmp une float %val24, undef + %cmp24 = fcmp une float %val24, 0.0 br i1 %cmp24, label %25, label %24 %val25 = load volatile float, float* undef - %cmp25 = fcmp une float %val25, undef + %cmp25 = fcmp une float %val25, 0.0 br i1 %cmp25, label %26, label %25 %val26 = load volatile float, float* undef - %cmp26 = fcmp une float %val26, undef + %cmp26 = fcmp une float %val26, 0.0 br i1 %cmp26, label %27, label %26 %val27 = load volatile float, float* undef - %cmp27 = fcmp une float %val27, undef + %cmp27 = fcmp une float %val27, 0.0 br i1 %cmp27, label %28, label %27 %val28 = load volatile float, float* undef - %cmp28 = fcmp une float %val28, undef + %cmp28 = fcmp une float %val28, 0.0 br i1 %cmp28, label %29, label %28 %val29 = load volatile float, float* undef - %cmp29 = fcmp une float %val29, undef + %cmp29 = fcmp une float %val29, 0.0 br i1 %cmp29, label %30, label %29 %val30 = load volatile float, float* undef - %cmp30 = fcmp une float %val30, undef + %cmp30 = fcmp une float %val30, 0.0 br i1 %cmp30, label %31, label %30 %val31 = load volatile float, float* undef - %cmp31 = fcmp une float %val31, undef + %cmp31 = fcmp une float %val31, 0.0 br i1 %cmp31, label %32, label %31 %val32 = load volatile float, float* undef - %cmp32 = fcmp une float %val32, undef + %cmp32 = fcmp une float %val32, 0.0 br i1 %cmp32, label %33, label %32 %val33 = load volatile float, float* undef - %cmp33 = fcmp une float %val33, undef + %cmp33 = fcmp une float %val33, 0.0 br i1 %cmp33, label %34, label %33 %val34 = load volatile float, float* undef - %cmp34 = fcmp une float %val34, undef + %cmp34 = fcmp une float %val34, 0.0 br i1 %cmp34, label %35, label %34 %val35 = load volatile float, float* undef - %cmp35 = fcmp une float %val35, undef + %cmp35 = fcmp une float %val35, 0.0 br i1 %cmp35, label %36, label %35 %val36 = load volatile float, float* undef - %cmp36 = fcmp une float %val36, undef + %cmp36 = fcmp une float %val36, 0.0 br i1 %cmp36, label %37, label %36 %val37 = load volatile float, float* undef - %cmp37 = fcmp une float %val37, undef + %cmp37 = fcmp une float %val37, 0.0 br i1 %cmp37, label %38, label %37 %val38 = load volatile float, float* undef - %cmp38 = fcmp une float %val38, undef + %cmp38 = fcmp une float %val38, 0.0 br i1 %cmp38, label %39, label %38 %val39 = load volatile float, float* undef - %cmp39 = fcmp une float %val39, undef + %cmp39 = fcmp une float %val39, 0.0 br i1 %cmp39, label %40, label %39 %val40 = load volatile float, float* undef - %cmp40 = fcmp une float %val40, undef + %cmp40 = fcmp une float %val40, 0.0 br i1 %cmp40, label %41, label %40 %val41 = load volatile float, float* undef %cmp41 = fcmp une float %val41, undef br i1 %cmp41, label %42, label %41 %val42 = load volatile float, float* undef - %cmp42 = fcmp une float %val42, undef + %cmp42 = fcmp une float %val42, 0.0 br i1 %cmp42, label %43, label %42 %val43 = load volatile float, float* undef - %cmp43 = fcmp une float %val43, undef + %cmp43 = fcmp une float %val43, 0.0 br i1 %cmp43, label %44, label %43 %val44 = load volatile float, float* undef - %cmp44 = fcmp une float %val44, undef + %cmp44 = fcmp une float %val44, 0.0 br i1 %cmp44, label %45, label %44 %val45 = load volatile float, float* undef - %cmp45 = fcmp une float %val45, undef + %cmp45 = fcmp une float %val45, 0.0 br i1 %cmp45, label %46, label %45 %val46 = load volatile float, float* undef - %cmp46 = fcmp une float %val46, undef + %cmp46 = fcmp une float %val46, 0.0 br i1 %cmp46, label %47, label %46 %val47 = load volatile float, float* undef - %cmp47 = fcmp une float %val47, undef + %cmp47 = fcmp une float %val47, 0.0 br i1 %cmp47, label %48, label %47 %val48 = load volatile float, float* undef - %cmp48 = fcmp une float %val48, undef + %cmp48 = fcmp une float %val48, 0.0 br i1 %cmp48, label %49, label %48 %val49 = load volatile float, float* undef - %cmp49 = fcmp une float %val49, undef + %cmp49 = fcmp une float %val49, 0.0 br i1 %cmp49, label %50, label %49 %val50 = load volatile float, float* undef - %cmp50 = fcmp une float %val50, undef + %cmp50 = fcmp une float %val50, 0.0 br i1 %cmp50, label %51, label %50 %val51 = load volatile float, float* undef - %cmp51 = fcmp une float %val51, undef + %cmp51 = fcmp une float %val51, 0.0 br i1 %cmp51, label %52, label %51 %val52 = load volatile float, float* undef - %cmp52 = fcmp une float %val52, undef + %cmp52 = fcmp une float %val52, 0.0 br i1 %cmp52, label %53, label %52 %val53 = load volatile float, float* undef - %cmp53 = fcmp une float %val53, undef + %cmp53 = fcmp une float %val53, 0.0 br i1 %cmp53, label %54, label %53 %val54 = load volatile float, float* undef - %cmp54 = fcmp une float %val54, undef + %cmp54 = fcmp une float %val54, 0.0 br i1 %cmp54, label %55, label %54 %val55 = load volatile float, float* undef - %cmp55 = fcmp une float %val55, undef + %cmp55 = fcmp une float %val55, 0.0 br i1 %cmp55, label %56, label %55 %val56 = load volatile float, float* undef - %cmp56 = fcmp une float %val56, undef + %cmp56 = fcmp une float %val56, 0.0 br i1 %cmp56, label %57, label %56 %val57 = load volatile float, float* undef - %cmp57 = fcmp une float %val57, undef + %cmp57 = fcmp une float %val57, 0.0 br i1 %cmp57, label %58, label %57 %val58 = load volatile float, float* undef - %cmp58 = fcmp une float %val58, undef + %cmp58 = fcmp une float %val58, 0.0 br i1 %cmp58, label %59, label %58 %val59 = load volatile float, float* undef - %cmp59 = fcmp une float %val59, undef + %cmp59 = fcmp une float %val59, 0.0 br i1 %cmp59, label %60, label %59 %val60 = load volatile float, float* undef - %cmp60 = fcmp une float %val60, undef + %cmp60 = fcmp une float %val60, 0.0 br i1 %cmp60, label %61, label %60 %val61 = load volatile float, float* undef - %cmp61 = fcmp une float %val61, undef + %cmp61 = fcmp une float %val61, 0.0 br i1 %cmp61, label %62, label %61 %val62 = load volatile float, float* undef - %cmp62 = fcmp une float %val62, undef + %cmp62 = fcmp une float %val62, 0.0 br i1 %cmp62, label %63, label %62 %val63 = load volatile float, float* undef - %cmp63 = fcmp une float %val63, undef + %cmp63 = fcmp une float %val63, 0.0 br i1 %cmp63, label %64, label %63 %val64 = load volatile float, float* undef - %cmp64 = fcmp une float %val64, undef + %cmp64 = fcmp une float %val64, 0.0 br i1 %cmp64, label %65, label %64 br label %exit Index: llvm/trunk/test/CodeGen/X86/fcmp-constant.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fcmp-constant.ll +++ llvm/trunk/test/CodeGen/X86/fcmp-constant.ll @@ -18,7 +18,7 @@ define <2 x i64> @fcmp_oeq_v2f64_undef() { ; CHECK-LABEL: fcmp_oeq_v2f64_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpeqpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = fcmp oeq <2 x double> , undef %2 = sext <2 x i1> %1 to <2 x i64> @@ -28,8 +28,7 @@ define <2 x i64> @fcmp_oeq_v2f64_undef_elt() { ; CHECK-LABEL: fcmp_oeq_v2f64_undef_elt: ; CHECK: # %bb.0: -; CHECK-NEXT: movapd {{.*#+}} xmm0 = -; CHECK-NEXT: cmpeqpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = fcmp oeq <2 x double> , %2 = sext <2 x i1> %1 to <2 x i64> @@ -49,7 +48,7 @@ define <4 x i32> @fcmp_oeq_v4f32_undef() { ; CHECK-LABEL: fcmp_oeq_v4f32_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpeqps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = fcmp oeq <4 x float> , undef %2 = sext <4 x i1> %1 to <4 x i32> @@ -59,8 +58,7 @@ define <4 x i32> @fcmp_oeq_v4f32_undef_elt() { ; CHECK-LABEL: fcmp_oeq_v4f32_undef_elt: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = -; CHECK-NEXT: cmpeqps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,4294967295,0] ; CHECK-NEXT: retq %1 = fcmp oeq <4 x float> , %2 = sext <4 x i1> %1 to <4 x i32> @@ -84,11 +82,7 @@ define <2 x i64> @fcmp_ueq_v2f64_undef() { ; CHECK-LABEL: fcmp_ueq_v2f64_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,-1.7976931348623157E+308] -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: cmpeqpd %xmm0, %xmm1 -; CHECK-NEXT: cmpunordpd %xmm0, %xmm0 -; CHECK-NEXT: orpd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = fcmp ueq <2 x double> , undef %2 = sext <2 x i1> %1 to <2 x i64> @@ -98,12 +92,8 @@ define <2 x i64> @fcmp_ueq_v2f64_undef_elt() { ; CHECK-LABEL: fcmp_ueq_v2f64_undef_elt: ; CHECK: # %bb.0: -; CHECK-NEXT: movapd {{.*#+}} xmm1 = -; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,-1.7976931348623157E+308] -; CHECK-NEXT: movapd %xmm0, %xmm2 -; CHECK-NEXT: cmpeqpd %xmm1, %xmm2 -; CHECK-NEXT: cmpunordpd %xmm1, %xmm0 -; CHECK-NEXT: orpd %xmm2, %xmm0 +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: movq %rax, %xmm0 ; CHECK-NEXT: retq %1 = fcmp ueq <2 x double> , %2 = sext <2 x i1> %1 to <2 x i64> @@ -123,11 +113,7 @@ define <4 x i32> @fcmp_ueq_v4f32_undef() { ; CHECK-LABEL: fcmp_ueq_v4f32_undef: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,-1.0E+0,2.0E+0,-0.0E+0] -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: cmpeqps %xmm0, %xmm1 -; CHECK-NEXT: cmpunordps %xmm0, %xmm0 -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = fcmp ueq <4 x float> , undef %2 = sext <4 x i1> %1 to <4 x i32> @@ -137,12 +123,7 @@ define <4 x i32> @fcmp_ueq_v4f32_undef_elt() { ; CHECK-LABEL: fcmp_ueq_v4f32_undef_elt: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = -; CHECK-NEXT: movaps {{.*#+}} xmm0 = <-0.0E+0,1.0E+0,-1.0E+0,u> -; CHECK-NEXT: movaps %xmm0, %xmm2 -; CHECK-NEXT: cmpeqps %xmm1, %xmm2 -; CHECK-NEXT: cmpunordps %xmm1, %xmm0 -; CHECK-NEXT: orps %xmm2, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = fcmp ueq <4 x float> , %2 = sext <4 x i1> %1 to <4 x i32>