diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -659,10 +659,9 @@ /// This instruction implements SINT_TO_FP with the /// integer source in memory and FP reg result. This corresponds to the /// X86::FILD*m instructions. It has two inputs (token chain and address) - /// and two outputs (FP value and token chain). FILD_FLAG also produces a - /// flag). The integer source type is specified by the memory VT. + /// and two outputs (FP value and token chain). The integer source type is + /// specified by the memory VT. FILD, - FILD_FLAG, /// This instruction implements a fp->int store from FP stack /// slots. This corresponds to the fist instruction. It takes a diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18903,7 +18903,7 @@ SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); if (useSSE) - Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue); + Tys = DAG.getVTList(MVT::f80, MVT::Other); else Tys = DAG.getVTList(Op.getValueType(), MVT::Other); @@ -18922,23 +18922,18 @@ } SDValue FILDOps[] = {Chain, StackSlot}; SDValue Result = - DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL, + DAG.getMemIntrinsicNode(X86ISD::FILD, DL, Tys, FILDOps, SrcVT, LoadMMO); Chain = Result.getValue(1); if (useSSE) { - SDValue InFlag = Result.getValue(2); - - // FIXME: Currently the FST is glued to the FILD_FLAG. This - // shouldn't be necessary except that RFP cannot be live across - // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); unsigned SSFISize = Op.getValueSizeInBits() / 8; int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false); auto PtrVT = getPointerTy(MF.getDataLayout()); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); Tys = DAG.getVTList(MVT::Other); - SDValue FSTOps[] = {Chain, Result, StackSlot, InFlag}; + SDValue FSTOps[] = {Chain, Result, StackSlot}; MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOStore, SSFISize, SSFISize); @@ -29448,14 +29443,12 @@ if (Subtarget.hasX87()) { // First load this into an 80-bit X87 register. This will put the whole // integer into the significand. - // FIXME: Do we need to glue? See FIXME comment in BuildFILD. - SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue); + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG, + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, Node->getMemOperand()); SDValue Chain = Result.getValue(1); - SDValue InFlag = Result.getValue(2); // Now store the X87 register to a stack temporary and convert to i64. // This store is not atomic and doesn't need to be. @@ -29465,7 +29458,7 @@ int SPFI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); - SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag }; + SDValue StoreOps[] = { Chain, Result, StackPtr }; Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, MPI, 0 /*Align*/, @@ -29647,7 +29640,6 @@ case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; - case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; case X86ISD::FIST: return "X86ISD::FIST"; case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -29,16 +29,11 @@ def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, - [SDNPHasChain, SDNPOptInGlue, SDNPMayStore, - SDNPMemOperand]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, - [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, - SDNPMemOperand]>; def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist, - [SDNPHasChain, SDNPOptInGlue, SDNPMayStore, - SDNPMemOperand]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -79,10 +74,6 @@ return cast(N)->getMemoryVT() == MVT::i64; }]>; -def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]>; - def X86fist64 : PatFrag<(ops node:$val, node:$ptr), (X86fist node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; @@ -800,11 +791,7 @@ def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>; def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; -// Used to conv. i64 to f64 since there isn't a SSE version. -def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>; - // Used to conv. between f80 and i64 for i64 atomic loads. -def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>; def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; // FP extensions map onto simple pseudo-value conversions if they are to/from diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll @@ -640,25 +640,24 @@ ; AVX-32-NEXT: .cfi_def_cfa_register %ebp ; AVX-32-NEXT: andl $-8, %esp ; AVX-32-NEXT: subl $64, %esp -; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vmovlps %xmm0, 56(%esp) ; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vmovlps %xmm1, 40(%esp) ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vmovlps %xmm0, 24(%esp) ; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) -; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) +; AVX-32-NEXT: vmovlps %xmm0, 8(%esp) +; AVX-32-NEXT: fildll 56(%esp) +; AVX-32-NEXT: fstpl 48(%esp) +; AVX-32-NEXT: fildll 40(%esp) +; AVX-32-NEXT: fstpl 32(%esp) +; AVX-32-NEXT: fildll 24(%esp) +; AVX-32-NEXT: fstpl 16(%esp) +; AVX-32-NEXT: fildll 8(%esp) +; AVX-32-NEXT: fstpl (%esp) ; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] -; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) -; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX-32-NEXT: fstpl (%esp) -; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll @@ -272,47 +272,44 @@ ; NODQ-32-NEXT: andl $-8, %esp ; NODQ-32-NEXT: subl $128, %esp ; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 56(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 40(%esp) ; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1 -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 24(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) -; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 8(%esp) +; NODQ-32-NEXT: vmovlps %xmm0, 120(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 104(%esp) ; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm0 -; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm0, 88(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm0, 72(%esp) +; NODQ-32-NEXT: fildll 56(%esp) +; NODQ-32-NEXT: fstpl 48(%esp) +; NODQ-32-NEXT: fildll 40(%esp) +; NODQ-32-NEXT: fstpl 32(%esp) +; NODQ-32-NEXT: fildll 24(%esp) +; NODQ-32-NEXT: fstpl 16(%esp) +; NODQ-32-NEXT: fildll 8(%esp) +; NODQ-32-NEXT: fstpl (%esp) +; NODQ-32-NEXT: fildll 120(%esp) +; NODQ-32-NEXT: fstpl 112(%esp) +; NODQ-32-NEXT: fildll 104(%esp) +; NODQ-32-NEXT: fstpl 96(%esp) +; NODQ-32-NEXT: fildll 88(%esp) +; NODQ-32-NEXT: fstpl 80(%esp) +; NODQ-32-NEXT: fildll 72(%esp) +; NODQ-32-NEXT: fstpl 64(%esp) ; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl (%esp) -; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1] ; NODQ-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 @@ -403,43 +400,42 @@ ; NODQ-32-NEXT: .cfi_def_cfa_register %ebp ; NODQ-32-NEXT: andl $-8, %esp ; NODQ-32-NEXT: subl $96, %esp -; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm0, 88(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 80(%esp) ; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm1 -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 72(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 64(%esp) ; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 56(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm1, 48(%esp) ; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm0, 40(%esp) ; NODQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] -; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm0, 32(%esp) +; NODQ-32-NEXT: fildll 88(%esp) +; NODQ-32-NEXT: fstps 28(%esp) +; NODQ-32-NEXT: fildll 80(%esp) +; NODQ-32-NEXT: fstps 24(%esp) +; NODQ-32-NEXT: fildll 72(%esp) +; NODQ-32-NEXT: fstps 20(%esp) +; NODQ-32-NEXT: fildll 64(%esp) +; NODQ-32-NEXT: fstps 16(%esp) +; NODQ-32-NEXT: fildll 56(%esp) +; NODQ-32-NEXT: fstps 12(%esp) +; NODQ-32-NEXT: fildll 48(%esp) +; NODQ-32-NEXT: fstps 8(%esp) +; NODQ-32-NEXT: fildll 40(%esp) +; NODQ-32-NEXT: fstps 4(%esp) +; NODQ-32-NEXT: fildll 32(%esp) +; NODQ-32-NEXT: fstps (%esp) ; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) -; NODQ-32-NEXT: fstps (%esp) -; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]