Index: lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -163,6 +163,7 @@ bool visitInstruction(Instruction &I) { return false; } bool visitBinaryOperator(BinaryOperator &I); + bool visitCallInst(CallInst &I); bool visitLoadInst(LoadInst &I); bool visitICmpInst(ICmpInst &I); bool visitSelectInst(SelectInst &I); @@ -795,6 +796,35 @@ return Changed; } +bool AMDGPUCodeGenPrepare::visitCallInst(CallInst &I) { + if (!ST->isAmdHsaOS()) + return false; + + if (!I.getCalledFunction()) + return false; + + StringRef Name = I.getCalledFunction()->getName(); + if (!Name.equals("__is_work_group_size_uniform")) + return false; + + bool IsUniformWorkGroupSize = false; + if (I.getFunction()->hasFnAttribute("uniform-work-group-size")) { + Attribute A = I.getFunction()->getFnAttribute("uniform-work-group-size"); + if (A.isStringAttribute()) { + if (A.getValueAsString().equals("true")) + IsUniformWorkGroupSize = true; + } + } + if (IsUniformWorkGroupSize) + I.replaceAllUsesWith(ConstantInt::getTrue(I.getModule()->getContext())); + else + I.replaceAllUsesWith(ConstantInt::getFalse(I.getModule()->getContext())); + + I.eraseFromParent(); + return true; + +} + bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) { if (!WidenLoads) return false; Index: test/CodeGen/AMDGPU/amdgpu-codegenprepare-uniform-work-group-size.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdgpu-codegenprepare-uniform-work-group-size.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=NONAMDHSA %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-codegenprepare %s | FileCheck -check-prefix=GCN -check-prefix=AMDHSA %s + +; GCN-LABEL: @test0( +; NONAMDHSA: %[[CR:[0-9]+]] = call i1 @__is_work_group_size_uniform() +; NONAMDHSA: select i1 %[[CR]], i32 17, i32 27 + +; AMDHSA-NOT: call i1 @__is_work_group_size_uniform() +; AMDHSA: select i1 true, i32 17, i32 27 +define amdgpu_kernel void @test0(i32 addrspace(1)* %out) #0 { +entry: + %0 = call i1 @__is_work_group_size_uniform() + %1 = select i1 %0, i32 17, i32 27 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: @test1( +; NONAMDHSA: %[[CR:[0-9]+]] = call i1 @__is_work_group_size_uniform() +; NONAMDHSA: select i1 %[[CR]], i32 17, i32 27 + +; AMDHSA-NOT: call i1 @__is_work_group_size_uniform() +; AMDHSA: select i1 false, i32 17, i32 27 +define amdgpu_kernel void @test1(i32 addrspace(1)* %out) #1 { +entry: + %0 = call i1 @__is_work_group_size_uniform() + %1 = select i1 %0, i32 17, i32 27 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: @test2( +; NONAMDHSA: %[[CR:[0-9]+]] = call i1 @__is_work_group_size_uniform() +; NONAMDHSA: select i1 %[[CR]], i32 17, i32 27 + +; AMDHSA-NOT: %[[CR:[0-9]+]] = call i1 @__is_work_group_size_uniform() +; AMDHSA-NOT: select i1 %[[CR]], i32 17, i32 27 +define amdgpu_kernel void @test2(i32 addrspace(1)* %out) { +entry: + %0 = call i1 @__is_work_group_size_uniform() + %1 = select i1 %0, i32 17, i32 27 + store i32 %1, i32 addrspace(1)* %out + ret void +} + + +declare i1 @__is_work_group_size_uniform() + +attributes #0 = { "uniform-work-group-size"="true"} +attributes #1 = { "uniform-work-group-size"="false"} +