diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -609,6 +609,7 @@ /// 1 Round to nearest /// 2 Round to +inf /// 3 Round to -inf + /// Result is rounding mode and chain. Input is a chain. FLT_ROUNDS_, /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type. diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -619,7 +619,7 @@ let TargetPrefix = "aarch64" in { class FPCR_Get_Intrinsic - : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + : Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; } // FPCR diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2823,6 +2823,7 @@ } case ISD::FLT_ROUNDS_: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); break; case ISD::EH_RETURN: case ISD::EH_LABEL: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -563,7 +563,13 @@ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - return DAG.getNode(N->getOpcode(), dl, NVT); + SDValue Res = + DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; } SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { @@ -2744,10 +2750,15 @@ unsigned NBitWidth = NVT.getSizeInBits(); EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT); + Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0)); + SDValue Chain = Lo.getValue(1); // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6630,7 +6630,9 @@ case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: - setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); + Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot()); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return; case Intrinsic::expect: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2869,16 +2869,19 @@ // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); - SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64, - DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, - MVT::i64)); + SDValue Chain = Op.getOperand(0); + SDValue FPCR_64 = DAG.getNode( + ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other}, + {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)}); + Chain = FPCR_64.getValue(1); SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, DAG.getConstant(22, dl, MVT::i32)); - return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, - DAG.getConstant(3, dl, MVT::i32)); + SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + DAG.getConstant(3, dl, MVT::i32)); + return DAG.getMergeValues({AND, Chain}, dl); } static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5948,16 +5948,20 @@ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); - SDValue Ops[] = { DAG.getEntryNode(), - DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) }; + SDValue Chain = Op.getOperand(0); + SDValue Ops[] = {Chain, + DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)}; - SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops); + SDValue FPSCR = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops); + Chain = FPSCR.getValue(1); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, DAG.getConstant(22, dl, MVT::i32)); - return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, - DAG.getConstant(3, dl, MVT::i32)); + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + DAG.getConstant(3, dl, MVT::i32)); + return DAG.getMergeValues({And, Chain}, dl); } static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8306,22 +8306,20 @@ EVT PtrVT = getPointerTy(MF.getDataLayout()); // Save FP Control Word to register - EVT NodeTys[] = { - MVT::f64, // return register - MVT::Glue // unused in this context - }; - SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); + SDValue Chain = Op.getOperand(0); + SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain); + Chain = MFFS.getValue(1); // Save FP register to stack slot int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, - MachinePointerInfo()); + Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo()); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo()); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo()); + Chain = CWD.getValue(1); // Transform as necessary SDValue CWD1 = @@ -8338,8 +8336,11 @@ SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); - return DAG.getNode((VT.getSizeInBits() < 16 ? - ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); + RetVal = + DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), + dl, VT, RetVal); + + return DAG.getMergeValues({RetVal, Chain}, dl); } SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -155,7 +155,8 @@ // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", - SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; + SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, + [SDNPHasChain]>; // Perform FADD in round-to-zero mode. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25647,14 +25647,15 @@ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), MachineMemOperand::MOStore, 2, 2); - SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; - SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, - DAG.getVTList(MVT::Other), - Ops, MVT::i16, MMO); + SDValue Chain = Op.getOperand(0); + SDValue Ops[] = {Chain, StackSlot}; + Chain = DAG.getMemIntrinsicNode( + X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo()); + Chain = CWD.getValue(1); // Mask and turn the control bits into a shift for the lookup table. SDValue Shift = @@ -25670,7 +25671,9 @@ DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift), DAG.getConstant(3, DL, MVT::i32)); - return DAG.getZExtOrTrunc(RetVal, DL, VT); + RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT); + + return DAG.getMergeValues({RetVal, Chain}, DL); } // Split an unary integer op into 2 half sized ops. diff --git a/llvm/test/CodeGen/X86/flt-rounds.ll b/llvm/test/CodeGen/X86/flt-rounds.ll --- a/llvm/test/CodeGen/X86/flt-rounds.ll +++ b/llvm/test/CodeGen/X86/flt-rounds.ll @@ -49,25 +49,49 @@ ; X86-NEXT: shrl $9, %ecx ; X86-NEXT: andb $6, %cl ; X86-NEXT: movl $45, %esi +; X86-NEXT: movl $45, %eax ; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrl %cl, %esi -; X86-NEXT: andl $3, %esi +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: andl $3, %eax ; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: cmpl $3, %esi +; X86-NEXT: cmpl $3, %eax ; X86-NEXT: setne %bl ; X86-NEXT: movl $0, (%esp) ; X86-NEXT: calll fesetround -; X86-NEXT: movl $3072, (%esp) # imm = 0xC00 -; X86-NEXT: calll fesetround -; X86-NEXT: cmpl $1, %esi -; X86-NEXT: leal 1(%ebx), %eax +; X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrl $9, %ecx +; X86-NEXT: andb $6, %cl +; X86-NEXT: movl $45, %eax +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: andl $3, %eax +; X86-NEXT: cmpl $1, %eax ; X86-NEXT: je .LBB1_2 ; X86-NEXT: # %bb.1: # %entry -; X86-NEXT: movl %eax, %ebx +; X86-NEXT: incl %ebx ; X86-NEXT: .LBB1_2: # %entry +; X86-NEXT: movl $3072, (%esp) # imm = 0xC00 +; X86-NEXT: calll fesetround +; X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrl $9, %ecx +; X86-NEXT: andb $6, %cl +; X86-NEXT: movl $45, %eax +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: andl $3, %eax +; X86-NEXT: cmpl $1, %eax ; X86-NEXT: sbbl $-1, %ebx ; X86-NEXT: movl $2048, (%esp) # imm = 0x800 ; X86-NEXT: calll fesetround +; X86-NEXT: fnstcw {{[0-9]+}}(%esp) +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: shrl $9, %ecx +; X86-NEXT: andb $6, %cl +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shrl %cl, %esi +; X86-NEXT: andl $3, %esi ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: cmpl $2, %esi ; X86-NEXT: setne %cl @@ -92,25 +116,50 @@ ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx ; X64-NEXT: shrl $9, %ecx ; X64-NEXT: andb $6, %cl -; X64-NEXT: movl $45, %ebx +; X64-NEXT: movl $45, %r14d +; X64-NEXT: movl $45, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrl %cl, %ebx -; X64-NEXT: andl $3, %ebx -; X64-NEXT: xorl %r14d, %r14d -; X64-NEXT: cmpl $3, %ebx -; X64-NEXT: setne %r14b +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: andl $3, %eax +; X64-NEXT: xorl %ebx, %ebx +; X64-NEXT: cmpl $3, %eax +; X64-NEXT: setne %bl ; X64-NEXT: xorl %edi, %edi ; X64-NEXT: callq fesetround -; X64-NEXT: leal 1(%r14), %ebp +; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; X64-NEXT: shrl $9, %ecx +; X64-NEXT: andb $6, %cl +; X64-NEXT: movl $45, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: andl $3, %eax +; X64-NEXT: leal 1(%rbx), %ebp +; X64-NEXT: cmpl $1, %eax +; X64-NEXT: cmovel %ebx, %ebp ; X64-NEXT: movl $3072, %edi # imm = 0xC00 ; X64-NEXT: callq fesetround -; X64-NEXT: cmpl $1, %ebx -; X64-NEXT: cmovel %r14d, %ebp +; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; X64-NEXT: shrl $9, %ecx +; X64-NEXT: andb $6, %cl +; X64-NEXT: movl $45, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: andl $3, %eax +; X64-NEXT: cmpl $1, %eax ; X64-NEXT: sbbl $-1, %ebp ; X64-NEXT: movl $2048, %edi # imm = 0x800 ; X64-NEXT: callq fesetround +; X64-NEXT: fnstcw {{[0-9]+}}(%rsp) +; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx +; X64-NEXT: shrl $9, %ecx +; X64-NEXT: andb $6, %cl +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %r14d +; X64-NEXT: andl $3, %r14d ; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpl $2, %ebx +; X64-NEXT: cmpl $2, %r14d ; X64-NEXT: setne %cl ; X64-NEXT: negl %ecx ; X64-NEXT: xorl %eax, %eax