Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -146,4 +146,12 @@ [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is // IntrNoMem. + +def int_amdgcn_v_mbcnt_lo_u32_b32 : + GCCBuiltin<"__builtin_amdgcn_v_mbcnt_lo_u32_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_amdgcn_v_mbcnt_hi_u32_b32 : + GCCBuiltin<"__builtin_amdgcn_v_mbcnt_hi_u32_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } Index: lib/Target/AMDGPU/AMDGPUIntrinsicExpander.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsicExpander.cpp +++ lib/Target/AMDGPU/AMDGPUIntrinsicExpander.cpp @@ -29,6 +29,7 @@ Module *Mod; void expandSIFsInterp(IntrinsicInst &I); + void expandSITid(IntrinsicInst &I); public: static char ID; @@ -90,6 +91,28 @@ I.eraseFromParent(); } +void AMDGPUIntrinsicExpander::expandSITid(IntrinsicInst &I) { + IRBuilder<> Builder(&I); + + Function *MbcntLo = + Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_v_mbcnt_lo_u32_b32); + Function *MbcntHi = + Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_v_mbcnt_hi_u32_b32); + + IntegerType *I32 = IntegerType::get(I.getContext(), 32); + Constant *Mask = ConstantInt::get(I32, APInt::getAllOnesValue(32)); + Constant *Zero = ConstantInt::get(I32, 0); + + Value *LoArgs[] = {Mask, Zero}; + Value *Lo = Builder.CreateCall(MbcntLo, LoArgs); + + Value *HiArgs[] = {Mask, Lo}; + Value *Hi = Builder.CreateCall(MbcntHi, HiArgs); + + I.replaceAllUsesWith(Hi); + I.eraseFromParent(); +} + void AMDGPUIntrinsicExpander::visitCallInst(CallInst &I) { IntrinsicInst *Intr = dyn_cast(&I); if (!Intr) @@ -97,6 +120,9 @@ if (Intr->getCalledFunction()->getName() == "llvm.SI.fs.interp") expandSIFsInterp(*Intr); + else if (Intr->getCalledFunction()->getName() == "llvm.SI.tid") + expandSITid(*Intr); + } bool AMDGPUIntrinsicExpander::runOnFunction(Function &F) { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1590,10 +1590,10 @@ VOP_I32_I32_I32 >; defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_lo_u32_b32", - VOP_I32_I32_I32 + VOP_I32_I32_I32, int_amdgcn_v_mbcnt_lo_u32_b32 >; defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst , "v_mbcnt_hi_u32_b32", - VOP_I32_I32_I32 + VOP_I32_I32_I32, int_amdgcn_v_mbcnt_hi_u32_b32 >; defm V_LDEXP_F32 : VOP2_VI3_Inst , "v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp @@ -2752,12 +2752,6 @@ (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) >; -def : Pat < - (int_SI_tid), - (V_MBCNT_HI_U32_B32_e64 0xffffffff, - (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0)) ->; - //===----------------------------------------------------------------------===// // VOP3 Patterns //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIIntrinsics.td =================================================================== --- lib/Target/AMDGPU/SIIntrinsics.td +++ lib/Target/AMDGPU/SIIntrinsics.td @@ -14,7 +14,6 @@ let TargetPrefix = "SI", isTarget = 1 in { - def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; Index: test/CodeGen/AMDGPU/llvm.SI.tid.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.tid.ll +++ test/CodeGen/AMDGPU/llvm.SI.tid.ll @@ -13,6 +13,24 @@ ret void } +;GCN-LABEL: {{^}}mbcnt_intrinsics: +;GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0 +;SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]] +;VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]] + +define void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" { +main_body: + %lo = call i32 @llvm.amdgcn.v.mbcnt.lo.u32.b32(i32 -1, i32 0) + %hi = call i32 @llvm.amdgcn.v.mbcnt.hi.u32.b32(i32 -1, i32 %lo) + %4 = bitcast i32 %hi to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4) + ret void +} + +declare i32 @llvm.amdgcn.v.mbcnt.lo.u32.b32(i32, i32) readnone + +declare i32 @llvm.amdgcn.v.mbcnt.hi.u32.b32(i32, i32) readnone + declare i32 @llvm.SI.tid() readnone declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)