diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -659,10 +659,9 @@
       /// This instruction implements SINT_TO_FP with the
       /// integer source in memory and FP reg result.  This corresponds to the
       /// X86::FILD*m instructions. It has two inputs (token chain and address)
-      /// and two outputs (FP value and token chain). FILD_FLAG also produces a
-      /// flag). The integer source type is specified by the memory VT.
+      /// and two outputs (FP value and token chain). The integer source type is
+      /// specified by the memory VT.
       FILD,
-      FILD_FLAG,
 
       /// This instruction implements a fp->int store from FP stack
       /// slots. This corresponds to the fist instruction. It takes a
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18903,7 +18903,7 @@
   SDVTList Tys;
   bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
   if (useSSE)
-    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
+    Tys = DAG.getVTList(MVT::f80, MVT::Other);
   else
     Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
 
@@ -18922,23 +18922,18 @@
   }
   SDValue FILDOps[] = {Chain, StackSlot};
   SDValue Result =
-      DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL,
+      DAG.getMemIntrinsicNode(X86ISD::FILD, DL,
                               Tys, FILDOps, SrcVT, LoadMMO);
   Chain = Result.getValue(1);
 
   if (useSSE) {
-    SDValue InFlag = Result.getValue(2);
-
-    // FIXME: Currently the FST is glued to the FILD_FLAG. This
-    // shouldn't be necessary except that RFP cannot be live across
-    // multiple blocks. When stackifier is fixed, they can be uncoupled.
     MachineFunction &MF = DAG.getMachineFunction();
     unsigned SSFISize = Op.getValueSizeInBits() / 8;
     int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false);
     auto PtrVT = getPointerTy(MF.getDataLayout());
     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
     Tys = DAG.getVTList(MVT::Other);
-    SDValue FSTOps[] = {Chain, Result, StackSlot, InFlag};
+    SDValue FSTOps[] = {Chain, Result, StackSlot};
     MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
         MachineMemOperand::MOStore, SSFISize, SSFISize);
@@ -29448,14 +29443,12 @@
       if (Subtarget.hasX87()) {
         // First load this into an 80-bit X87 register. This will put the whole
         // integer into the significand.
-        // FIXME: Do we need to glue? See FIXME comment in BuildFILD.
-        SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
+        SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
         SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
-        SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
+        SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD,
                                                  dl, Tys, Ops, MVT::i64,
                                                  Node->getMemOperand());
         SDValue Chain = Result.getValue(1);
-        SDValue InFlag = Result.getValue(2);
 
         // Now store the X87 register to a stack temporary and convert to i64.
         // This store is not atomic and doesn't need to be.
@@ -29465,7 +29458,7 @@
         int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
         MachinePointerInfo MPI =
             MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
-        SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
+        SDValue StoreOps[] = { Chain, Result, StackPtr };
         Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
                                         DAG.getVTList(MVT::Other), StoreOps,
                                         MVT::i64, MPI, 0 /*Align*/,
@@ -29647,7 +29640,6 @@
   case X86ISD::FOR:                return "X86ISD::FOR";
   case X86ISD::FXOR:               return "X86ISD::FXOR";
   case X86ISD::FILD:               return "X86ISD::FILD";
-  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
   case X86ISD::FIST:               return "X86ISD::FIST";
   case X86ISD::FP_TO_INT_IN_MEM:   return "X86ISD::FP_TO_INT_IN_MEM";
   case X86ISD::FLD:                return "X86ISD::FLD";
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -29,16 +29,11 @@
 def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
                              [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
-                             [SDNPHasChain, SDNPOptInGlue, SDNPMayStore,
-                              SDNPMemOperand]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
                              [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
-                             [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
-                              SDNPMemOperand]>;
 def X86fist         : SDNode<"X86ISD::FIST", SDTX86Fist,
-                             [SDNPHasChain, SDNPOptInGlue, SDNPMayStore,
-                              SDNPMemOperand]>;
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def X86fp_stsw      : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
 def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
                           [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -79,10 +74,6 @@
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
 }]>;
 
-def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{
-  return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
 def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
                         (X86fist node:$val, node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
@@ -800,11 +791,7 @@
 def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
 def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
 
-// Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
-
 // Used to conv. between f80 and i64 for i64 atomic loads.
-def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
 def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
 
 // FP extensions map onto simple pseudo-value conversions if they are to/from
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
@@ -640,25 +640,24 @@
 ; AVX-32-NEXT:    .cfi_def_cfa_register %ebp
 ; AVX-32-NEXT:    andl $-8, %esp
 ; AVX-32-NEXT:    subl $64, %esp
-; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vmovlps %xmm0, 56(%esp)
 ; AVX-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vmovlps %xmm1, 40(%esp)
 ; AVX-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vmovlps %xmm0, 24(%esp)
 ; AVX-32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
-; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
+; AVX-32-NEXT:    vmovlps %xmm0, 8(%esp)
+; AVX-32-NEXT:    fildll 56(%esp)
+; AVX-32-NEXT:    fstpl 48(%esp)
+; AVX-32-NEXT:    fildll 40(%esp)
+; AVX-32-NEXT:    fstpl 32(%esp)
+; AVX-32-NEXT:    fildll 24(%esp)
+; AVX-32-NEXT:    fstpl 16(%esp)
+; AVX-32-NEXT:    fildll 8(%esp)
+; AVX-32-NEXT:    fstpl (%esp)
 ; AVX-32-NEXT:    wait
 ; AVX-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-32-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
-; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; AVX-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; AVX-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; AVX-32-NEXT:    fstpl (%esp)
-; AVX-32-NEXT:    wait
 ; AVX-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; AVX-32-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
 ; AVX-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
--- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
@@ -272,47 +272,44 @@
 ; NODQ-32-NEXT:    andl $-8, %esp
 ; NODQ-32-NEXT:    subl $128, %esp
 ; NODQ-32-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 56(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 40(%esp)
 ; NODQ-32-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 24(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 8(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm0, 120(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 104(%esp)
 ; NODQ-32-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm0, 88(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm0, 72(%esp)
+; NODQ-32-NEXT:    fildll 56(%esp)
+; NODQ-32-NEXT:    fstpl 48(%esp)
+; NODQ-32-NEXT:    fildll 40(%esp)
+; NODQ-32-NEXT:    fstpl 32(%esp)
+; NODQ-32-NEXT:    fildll 24(%esp)
+; NODQ-32-NEXT:    fstpl 16(%esp)
+; NODQ-32-NEXT:    fildll 8(%esp)
+; NODQ-32-NEXT:    fstpl (%esp)
+; NODQ-32-NEXT:    fildll 120(%esp)
+; NODQ-32-NEXT:    fstpl 112(%esp)
+; NODQ-32-NEXT:    fildll 104(%esp)
+; NODQ-32-NEXT:    fstpl 96(%esp)
+; NODQ-32-NEXT:    fildll 88(%esp)
+; NODQ-32-NEXT:    fstpl 80(%esp)
+; NODQ-32-NEXT:    fildll 72(%esp)
+; NODQ-32-NEXT:    fstpl 64(%esp)
 ; NODQ-32-NEXT:    wait
 ; NODQ-32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; NODQ-32-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl (%esp)
-; NODQ-32-NEXT:    wait
 ; NODQ-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; NODQ-32-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
 ; NODQ-32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    wait
 ; NODQ-32-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; NODQ-32-NEXT:    vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstpl {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    wait
 ; NODQ-32-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
 ; NODQ-32-NEXT:    vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1]
 ; NODQ-32-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -403,43 +400,42 @@
 ; NODQ-32-NEXT:    .cfi_def_cfa_register %ebp
 ; NODQ-32-NEXT:    andl $-8, %esp
 ; NODQ-32-NEXT:    subl $96, %esp
-; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm0, 88(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 80(%esp)
 ; NODQ-32-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 72(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 64(%esp)
 ; NODQ-32-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 56(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm1, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm1, 48(%esp)
 ; NODQ-32-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
-; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm0, 40(%esp)
 ; NODQ-32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; NODQ-32-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
+; NODQ-32-NEXT:    vmovlps %xmm0, 32(%esp)
+; NODQ-32-NEXT:    fildll 88(%esp)
+; NODQ-32-NEXT:    fstps 28(%esp)
+; NODQ-32-NEXT:    fildll 80(%esp)
+; NODQ-32-NEXT:    fstps 24(%esp)
+; NODQ-32-NEXT:    fildll 72(%esp)
+; NODQ-32-NEXT:    fstps 20(%esp)
+; NODQ-32-NEXT:    fildll 64(%esp)
+; NODQ-32-NEXT:    fstps 16(%esp)
+; NODQ-32-NEXT:    fildll 56(%esp)
+; NODQ-32-NEXT:    fstps 12(%esp)
+; NODQ-32-NEXT:    fildll 48(%esp)
+; NODQ-32-NEXT:    fstps 8(%esp)
+; NODQ-32-NEXT:    fildll 40(%esp)
+; NODQ-32-NEXT:    fstps 4(%esp)
+; NODQ-32-NEXT:    fildll 32(%esp)
+; NODQ-32-NEXT:    fstps (%esp)
 ; NODQ-32-NEXT:    wait
 ; NODQ-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
 ; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
 ; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fildll {{[0-9]+}}(%esp)
-; NODQ-32-NEXT:    fstps (%esp)
-; NODQ-32-NEXT:    wait
 ; NODQ-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
 ; NODQ-32-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]