Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -101,6 +101,7 @@ VMOVRRD, // double to two gprs. VMOVDRR, // Two gprs to double. + VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1275,6 +1275,7 @@ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; + case ARMISD::VMOVSR: return "ARMISD::VMOVSR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP"; @@ -4369,9 +4370,10 @@ bool InvalidOnQNaN; FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); - // Try to generate VMAXNM/VMINNM on ARMv8. - if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || - TrueVal.getValueType() == MVT::f64)) { + // Try to generate VMAXNM/VMINNM on ARMv8. Except if we compare to a zero. + // This ensures we use CMPFPw0 instead of CMPFP in such case. + if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) && + (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); @@ -5660,7 +5662,8 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { - bool IsDouble = Op.getValueType() == MVT::f64; + EVT VT = Op.getValueType(); + bool IsDouble = (VT == MVT::f64); ConstantFPSDNode *CFP = cast(Op); const APFloat &FPVal = CFP->getValueAPF(); @@ -5676,7 +5679,13 @@ std::swap(Lo, Hi); return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi); } else { - return DAG.getConstant(INTVal, DL, MVT::i32); + assert(VT == MVT::f32); + // leave the constant if target can address it as an immediate. + // Otherwise, use an integer constant that is moved into fp-reg. + if (isFPImmLegal(FPVal, VT)) + return Op; + return DAG.getNode(ARMISD::VMOVSR, DL, VT, + DAG.getConstant(INTVal, DL, MVT::i32)); } } Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -17,11 +17,14 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisVT<2, f64>]>; +def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; + def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; +def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>; //===----------------------------------------------------------------------===// // Operand Definitions. @@ -1052,6 +1055,7 @@ // pipelines. let D = VFPNeonDomain; } +def : Pat<(arm_vmovsr GPR:$Rt), (VMOVSR GPR:$Rt)>; let hasSideEffects = 0 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, Index: test/CodeGen/ARM/fcmp-xo.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fcmp-xo.ll @@ -0,0 +1,72 @@ +; RUN: llc -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+vfp4 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+fp-armv8 %s -o - | FileCheck %s + +; This function used to run into a code selection error on fp-armv8 due to +; different ordering of the constant arguments of fcmp. Fixed by extending the +; code selection to handle the missing case. +define arm_aapcs_vfpcc void @foo0() local_unnamed_addr { + br i1 undef, label %.end, label %1 + + %2 = fcmp nsz olt float undef, 0.000000e+00 + %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01 + %4 = fadd nsz float undef, %3 + %5 = fptosi float %4 to i32 + %6 = ashr i32 %5, 4 + %7 = icmp slt i32 %6, 0 + br i1 %7, label %8, label %.end + + tail call arm_aapcs_vfpcc void @bar() + br label %.end + +.end: + ret void +} +; CHECK-LABEL: foo0 +; CHECK: vcmpe.f32 {{s[0-9]+}}, #0 + + +define arm_aapcs_vfpcc void @foo1() local_unnamed_addr { + br i1 undef, label %.end, label %1 + + %2 = fcmp nsz olt float undef, 1.000000e+00 + %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01 + %4 = fadd nsz float undef, %3 + %5 = fptosi float %4 to i32 + %6 = ashr i32 %5, 4 + %7 = icmp slt i32 %6, 0 + br i1 %7, label %8, label %.end + + tail call arm_aapcs_vfpcc void @bar() + br label %.end + +.end: + ret void +} +; CHECK-LABEL: foo1 +; CHECK: vmov.f32 [[FPREG:s[0-9]+]], #1.000000e+00 +; CHECK: vcmpe.f32 {{s[0-9]+}}, [[FPREG]] + +define arm_aapcs_vfpcc void @foo128() local_unnamed_addr { + br i1 undef, label %.end, label %1 + + %2 = fcmp nsz olt float undef, 128.000000e+00 + %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01 + %4 = fadd nsz float undef, %3 + %5 = fptosi float %4 to i32 + %6 = ashr i32 %5, 4 + %7 = icmp slt i32 %6, 0 + br i1 %7, label %8, label %.end + + tail call arm_aapcs_vfpcc void @bar() + br label %.end + +.end: + ret void +} +; CHECK-LABEL: foo128 +; CHECK: mov.w [[REG:r[0-9]+]], #1124073472 +; CHECK: vmov [[FPREG:s[0-9]+]], [[REG]] +; CHECK: vcmpe.f32 {{s[0-9]+}}, [[FPREG]] + +declare arm_aapcs_vfpcc void @bar() local_unnamed_addr +