Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h
+++ lib/Target/ARM/ARMISelLowering.h
@@ -101,6 +101,7 @@
 
       VMOVRRD,      // double to two gprs.
       VMOVDRR,      // Two gprs to double.
+      VMOVSR,       // move gpr to single, used for f32 literal constructed in a gpr
 
       EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
       EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -1275,6 +1275,7 @@
 
   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
+  case ARMISD::VMOVSR:        return "ARMISD::VMOVSR";
 
   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
   case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
@@ -4369,9 +4370,10 @@
   bool InvalidOnQNaN;
   FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
 
-  // Try to generate VMAXNM/VMINNM on ARMv8.
-  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
-                                  TrueVal.getValueType() == MVT::f64)) {
+  // Try to generate VMAXNM/VMINNM on ARMv8. Except if we compare to a zero.
+  // This ensures we use CMPFPw0 instead of CMPFP in such case.
+  if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) &&
+    (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) {
     bool swpCmpOps = false;
     bool swpVselOps = false;
     checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -5660,7 +5662,8 @@
 
 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
                                            const ARMSubtarget *ST) const {
-  bool IsDouble = Op.getValueType() == MVT::f64;
+  EVT VT = Op.getValueType();
+  bool IsDouble = (VT == MVT::f64);
   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
   const APFloat &FPVal = CFP->getValueAPF();
 
@@ -5676,7 +5679,13 @@
         std::swap(Lo, Hi);
       return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
     } else {
-      return DAG.getConstant(INTVal, DL, MVT::i32);
+      assert(VT == MVT::f32);
+      // leave the constant if target can address it as an immediate.
+      // Otherwise, use an integer constant that is moved into fp-reg.
+      if (isFPImmLegal(FPVal, VT))
+        return Op;
+      return DAG.getNode(ARMISD::VMOVSR, DL, VT,
+          DAG.getConstant(INTVal, DL, MVT::i32));
     }
   }
 
Index: lib/Target/ARM/ARMInstrVFP.td
===================================================================
--- lib/Target/ARM/ARMInstrVFP.td
+++ lib/Target/ARM/ARMInstrVFP.td
@@ -17,11 +17,14 @@
 def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
                                        SDTCisVT<2, f64>]>;
 
+def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
+
 def arm_fmstat : SDNode<"ARMISD::FMSTAT",  SDTNone, [SDNPInGlue, SDNPOutGlue]>;
 def arm_cmpfp  : SDNode<"ARMISD::CMPFP",   SDT_ARMFCmp, [SDNPOutGlue]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
 def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
 def arm_fmrrd  : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
+def arm_vmovsr  : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
 
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
@@ -1052,6 +1055,7 @@
   // pipelines.
   let D = VFPNeonDomain;
 }
+def : Pat<(arm_vmovsr GPR:$Rt), (VMOVSR GPR:$Rt)>;
 
 let hasSideEffects = 0 in {
 def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
Index: test/CodeGen/ARM/fcmp-xo.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/fcmp-xo.ll
@@ -0,0 +1,72 @@
+; RUN: llc -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+vfp4 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+fp-armv8 %s -o - | FileCheck %s
+
+; This function used to run into a code selection error on fp-armv8 due to
+; different ordering of the constant arguments of fcmp. Fixed by extending the
+; code selection to handle the missing case.
+define arm_aapcs_vfpcc void @foo0() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt float undef, 0.000000e+00
+  %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01
+  %4 = fadd nsz float undef, %3
+  %5 = fptosi float %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: foo0
+; CHECK: vcmpe.f32 {{s[0-9]+}}, #0
+
+
+define arm_aapcs_vfpcc void @foo1() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt float undef, 1.000000e+00
+  %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01
+  %4 = fadd nsz float undef, %3
+  %5 = fptosi float %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: foo1
+; CHECK: vmov.f32 [[FPREG:s[0-9]+]], #1.000000e+00
+; CHECK: vcmpe.f32 {{s[0-9]+}}, [[FPREG]]
+
+define arm_aapcs_vfpcc void @foo128() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt float undef, 128.000000e+00
+  %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01
+  %4 = fadd nsz float undef, %3
+  %5 = fptosi float %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: foo128
+; CHECK: mov.w [[REG:r[0-9]+]], #1124073472
+; CHECK: vmov [[FPREG:s[0-9]+]], [[REG]]
+; CHECK: vcmpe.f32 {{s[0-9]+}}, [[FPREG]]
+
+declare arm_aapcs_vfpcc void @bar() local_unnamed_addr
+