diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -659,10 +659,9 @@ /// This instruction implements SINT_TO_FP with the /// integer source in memory and FP reg result. This corresponds to the /// X86::FILD*m instructions. It has two inputs (token chain and address) - /// and two outputs (FP value and token chain). FILD_FLAG also produces a - /// flag). The integer source type is specified by the memory VT. + /// and two outputs (FP value and token chain). The integer source type is + /// specified by the memory VT. FILD, - FILD_FLAG, /// This instruction implements a fp->int store from FP stack /// slots. This corresponds to the fist instruction. It takes a diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18903,7 +18903,7 @@ SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); if (useSSE) - Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue); + Tys = DAG.getVTList(MVT::f80, MVT::Other); else Tys = DAG.getVTList(Op.getValueType(), MVT::Other); @@ -18922,23 +18922,18 @@ } SDValue FILDOps[] = {Chain, StackSlot}; SDValue Result = - DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL, + DAG.getMemIntrinsicNode(X86ISD::FILD, DL, Tys, FILDOps, SrcVT, LoadMMO); Chain = Result.getValue(1); if (useSSE) { - SDValue InFlag = Result.getValue(2); - - // FIXME: Currently the FST is glued to the FILD_FLAG. This - // shouldn't be necessary except that RFP cannot be live across - // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); unsigned SSFISize = Op.getValueSizeInBits() / 8; int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false); auto PtrVT = getPointerTy(MF.getDataLayout()); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); Tys = DAG.getVTList(MVT::Other); - SDValue FSTOps[] = {Chain, Result, StackSlot, InFlag}; + SDValue FSTOps[] = {Chain, Result, StackSlot}; MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), MachineMemOperand::MOStore, SSFISize, SSFISize); @@ -29448,14 +29443,12 @@ if (Subtarget.hasX87()) { // First load this into an 80-bit X87 register. This will put the whole // integer into the significand. - // FIXME: Do we need to glue? See FIXME comment in BuildFILD. - SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue); + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; - SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG, + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, Node->getMemOperand()); SDValue Chain = Result.getValue(1); - SDValue InFlag = Result.getValue(2); // Now store the X87 register to a stack temporary and convert to i64. // This store is not atomic and doesn't need to be. @@ -29465,7 +29458,7 @@ int SPFI = cast(StackPtr.getNode())->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); - SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag }; + SDValue StoreOps[] = { Chain, Result, StackPtr }; Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, MPI, 0 /*Align*/, @@ -29647,7 +29640,6 @@ case X86ISD::FOR: return "X86ISD::FOR"; case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; - case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; case X86ISD::FIST: return "X86ISD::FIST"; case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -29,16 +29,11 @@ def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, - [SDNPHasChain, SDNPOptInGlue, SDNPMayStore, - SDNPMemOperand]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, - [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, - SDNPMemOperand]>; def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist, - [SDNPHasChain, SDNPOptInGlue, SDNPMayStore, - SDNPMemOperand]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -79,10 +74,6 @@ return cast(N)->getMemoryVT() == MVT::i64; }]>; -def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]>; - def X86fist64 : PatFrag<(ops node:$val, node:$ptr), (X86fist node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; @@ -800,11 +791,7 @@ def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>; def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; -// Used to conv. i64 to f64 since there isn't a SSE version. -def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>; - // Used to conv. between f80 and i64 for i64 atomic loads. -def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>; def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; // FP extensions map onto simple pseudo-value conversions if they are to/from diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll @@ -651,14 +651,13 @@ ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) -; AVX-32-NEXT: wait -; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstpl (%esp) ; AVX-32-NEXT: wait +; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll @@ -290,29 +290,26 @@ ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl (%esp) -; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] -; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) -; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: wait +; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1] ; NODQ-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 @@ -426,11 +423,6 @@ ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) -; NODQ-32-NEXT: wait -; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] -; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] -; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) @@ -440,6 +432,10 @@ ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstps (%esp) ; NODQ-32-NEXT: wait +; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] ; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]