Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -672,4 +672,40 @@ def int_amdgcn_ds_bpermute : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + +//===----------------------------------------------------------------------===// +// Special Intrinsics for backend internal use only. No frontend +// should emit calls to these. +// ===----------------------------------------------------------------------===// +def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty], + [llvm_i1_ty], [IntrConvergent] +>; + +def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty], + [llvm_i64_ty], [IntrConvergent] +>; + +def int_amdgcn_break : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty], [IntrNoMem, IntrConvergent] +>; + +def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], + [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent] +>; + +def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent] +>; + +def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], + [llvm_i64_ty], [IntrConvergent] +>; + +def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>; + +// Emit 2.5 ulp, no denormal division. Should only be inserted by +// pass based on !fpmath metadata. +def int_amdgcn_fdiv_fast : Intrinsic< + [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; } Index: lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "llvm/ADT/StringRef.h" @@ -389,9 +388,7 @@ Builder.setFastMathFlags(FMF); Builder.SetCurrentDebugLocation(FDiv.getDebugLoc()); - const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo(); - Function *Decl - = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {}); + Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast); Value *Num = FDiv.getOperand(0); Value *Den = FDiv.getOperand(1); Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -235,6 +235,12 @@ UMUL, // 32bit unsigned multiplication BRANCH_COND, // End AMDIL ISD Opcodes + + // Masked control flow nodes. + IF, + ELSE, + LOOP, + ENDPGM, RETURN, DWORDADDR, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3408,6 +3408,9 @@ NODE_NAME_CASE(BRANCH_COND); // AMDGPU DAG nodes + NODE_NAME_CASE(IF) + NODE_NAME_CASE(ELSE) + NODE_NAME_CASE(LOOP) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(RETURN) NODE_NAME_CASE(DWORDADDR) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -46,10 +46,38 @@ def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def AMDGPUIfOp : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] +>; + +def AMDGPUElseOp : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>] +>; + +def AMDGPULoopOp : SDTypeProfile<0, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>] +>; + +def AMDGPUBreakOp : SDTypeProfile<1, 1, + [SDTCisVT<0, i64>, SDTCisVT<1, i64>] +>; + +def AMDGPUIfBreakOp : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>] +>; + +def AMDGPUElseBreakOp : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>] +>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // +def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; +def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; +def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; + def AMDGPUconstdata_ptr : SDNode< "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<0, iPTR>]> Index: lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp +++ lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp @@ -55,10 +55,6 @@ ArrayRef Tys) const { // FIXME: Re-use Intrinsic::getType machinery switch (ID) { - case AMDGPUIntrinsic::amdgcn_fdiv_fast: { - Type *F32Ty = Type::getFloatTy(Context); - return FunctionType::get(F32Ty, { F32Ty, F32Ty }, false); - } default: llvm_unreachable("unhandled intrinsic"); } Index: lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -34,15 +34,6 @@ typedef std::pair StackEntry; typedef SmallVector StackVector; -// Intrinsic names the control flow is annotated with -static const char *const IfIntrinsic = "llvm.amdgcn.if"; -static const char *const ElseIntrinsic = "llvm.amdgcn.else"; -static const char *const BreakIntrinsic = "llvm.amdgcn.break"; -static const char *const IfBreakIntrinsic = "llvm.amdgcn.if.break"; -static const char *const ElseBreakIntrinsic = "llvm.amdgcn.else.break"; -static const char *const LoopIntrinsic = "llvm.amdgcn.loop"; -static const char *const EndCfIntrinsic = "llvm.amdgcn.end.cf"; - class SIAnnotateControlFlow : public FunctionPass { DivergenceAnalysis *DA; @@ -56,13 +47,13 @@ UndefValue *BoolUndef; Constant *Int64Zero; - Constant *If; - Constant *Else; - Constant *Break; - Constant *IfBreak; - Constant *ElseBreak; - Constant *Loop; - Constant *EndCf; + Function *If; + Function *Else; + Function *Break; + Function *IfBreak; + Function *ElseBreak; + Function *Loop; + Function *EndCf; DominatorTree *DT; StackVector Stack; @@ -139,30 +130,13 @@ BoolUndef = UndefValue::get(Boolean); Int64Zero = ConstantInt::get(Int64, 0); - If = M.getOrInsertFunction( - IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr); - - Else = M.getOrInsertFunction( - ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr); - - Break = M.getOrInsertFunction( - BreakIntrinsic, Int64, Int64, (Type *)nullptr); - cast(Break)->setDoesNotAccessMemory(); - - IfBreak = M.getOrInsertFunction( - IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr); - cast(IfBreak)->setDoesNotAccessMemory();; - - ElseBreak = M.getOrInsertFunction( - ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr); - cast(ElseBreak)->setDoesNotAccessMemory(); - - Loop = M.getOrInsertFunction( - LoopIntrinsic, Boolean, Int64, (Type *)nullptr); - - EndCf = M.getOrInsertFunction( - EndCfIntrinsic, Void, Int64, (Type *)nullptr); - + If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if); + Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else); + Break = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_break); + IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break); + ElseBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else_break); + Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop); + EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf); return false; } Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -102,7 +102,7 @@ bool isLegalFlatAddressingMode(const AddrMode &AM) const; bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; - bool isCFIntrinsic(const SDNode *Intr) const; + unsigned isCFIntrinsic(const SDNode *Intr) const; void createDebuggerPrologueStackObjects(MachineFunction &MF) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2139,31 +2139,25 @@ return nullptr; } -bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const { +unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const { if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) { switch (cast(Intr->getOperand(1))->getZExtValue()) { - case AMDGPUIntrinsic::amdgcn_if: - case AMDGPUIntrinsic::amdgcn_else: - case AMDGPUIntrinsic::amdgcn_end_cf: - case AMDGPUIntrinsic::amdgcn_loop: - return true; - default: - return false; - } - } - - if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) { - switch (cast(Intr->getOperand(0))->getZExtValue()) { - case AMDGPUIntrinsic::amdgcn_break: - case AMDGPUIntrinsic::amdgcn_if_break: - case AMDGPUIntrinsic::amdgcn_else_break: - return true; + case Intrinsic::amdgcn_if: + return AMDGPUISD::IF; + case Intrinsic::amdgcn_else: + return AMDGPUISD::ELSE; + case Intrinsic::amdgcn_loop: + return AMDGPUISD::LOOP; + case Intrinsic::amdgcn_end_cf: + llvm_unreachable("should not occur"); default: - return false; + return 0; } } - return false; + // break, if_break, else_break are all only used as inputs to loop, not + // directly as branch conditions. + return 0; } void SITargetLowering::createDebuggerPrologueStackObjects( @@ -2238,7 +2232,8 @@ // eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3 // => t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch - if (!isCFIntrinsic(Intr)) { + unsigned CFNode = isCFIntrinsic(Intr); + if (CFNode == 0) { // This is a uniform branch so we don't need to legalize. return BRCOND; } @@ -2256,15 +2251,13 @@ if (HaveChain) Ops.push_back(BRCOND.getOperand(0)); - Ops.append(Intr->op_begin() + (HaveChain ? 1 : 0), Intr->op_end()); + Ops.append(Intr->op_begin() + (HaveChain ? 2 : 1), Intr->op_end()); Ops.push_back(Target); ArrayRef Res(Intr->value_begin() + 1, Intr->value_end()); // build the new intrinsic call - SDNode *Result = DAG.getNode( - Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, - DAG.getVTList(Res), Ops).getNode(); + SDNode *Result = DAG.getNode(CFNode, DL, DAG.getVTList(Res), Ops).getNode(); if (!HaveChain) { SDValue Ops[] = { @@ -2788,7 +2781,7 @@ return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL, Op->getVTList(), Ops, VT, MMO); } - case AMDGPUIntrinsic::amdgcn_fdiv_fast: + case Intrinsic::amdgcn_fdiv_fast: return lowerFDIV_FAST(Op, DAG); case AMDGPUIntrinsic::SI_vs_load_input: return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT, Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -173,34 +173,29 @@ def SI_IF: CFPseudoInstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), - [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> { + [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> { let Constraints = ""; let Size = 12; - let mayLoad = 1; - let mayStore = 1; let hasSideEffects = 1; } def SI_ELSE : CFPseudoInstSI < - (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> { + (outs SReg_64:$dst), + (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> { let Constraints = "$src = $dst"; let Size = 12; - let mayStore = 1; - let mayLoad = 1; let hasSideEffects = 1; } def SI_LOOP : CFPseudoInstSI < (outs), (ins SReg_64:$saved, brtarget:$target), - [(int_amdgcn_loop i64:$saved, bb:$target)], 1, 1> { + [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> { let Size = 8; - let isBranch = 1; + let isBranch = 0; let hasSideEffects = 1; - let mayLoad = 1; - let mayStore = 1; } -} // End isBranch = 1, isTerminator = 1 +} // End isTerminator = 1 def SI_END_CF : CFPseudoInstSI < (outs), (ins SReg_64:$saved), @@ -208,9 +203,9 @@ let Size = 4; let isAsCheapAsAMove = 1; let isReMaterializable = 1; - let mayLoad = 1; - let mayStore = 1; let hasSideEffects = 1; + let mayLoad = 1; // FIXME: Should not need memory flags + let mayStore = 1; } def SI_BREAK : CFPseudoInstSI < @@ -400,7 +395,7 @@ >; def : Pat< - (int_amdgcn_else i64:$src, bb:$target), + (AMDGPUelse i64:$src, bb:$target), (SI_ELSE $src, $target, 0) >; Index: lib/Target/AMDGPU/SIIntrinsics.td =================================================================== --- lib/Target/AMDGPU/SIIntrinsics.td +++ lib/Target/AMDGPU/SIIntrinsics.td @@ -182,21 +182,3 @@ def int_SI_image_load_mip : Image; def int_SI_getresinfo : Image; } // End TargetPrefix = "SI", isTarget = 1 - -let TargetPrefix = "amdgcn", isTarget = 1 in { - // Emit 2.5 ulp, no denormal division. Should only be inserted by - // pass based on !fpmath metadata. - def int_amdgcn_fdiv_fast : Intrinsic< - [llvm_float_ty], [llvm_float_ty], [IntrNoMem] - >; - - /* Control flow Intrinsics */ - - def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], [IntrConvergent]>; - def int_amdgcn_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>; - def int_amdgcn_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent]>; - def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>; - def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>; - def int_amdgcn_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>; - def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>; -}