Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -48,6 +48,7 @@ const TargetMachine *TM = nullptr; bool addFeatureAttributes(Function &F); + bool propagateAttribute(Function &F); public: static char ID; @@ -213,6 +214,37 @@ handleAttr(Parent, Callee, AttrName); } +bool AMDGPUAnnotateKernelFeatures::propagateAttribute(Function &F) { + bool Changed = false; + if (F.hasFnAttribute("uniform-work-group-size")) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + CallSite CS(&I); + if (CS) { + Function *Callee = CS.getCalledFunction(); + if (!Callee) + continue; + + //Uniform-work-group-size attribute + if (F.getFnAttribute("uniform-work-group-size").getValueAsString().equals("false")) { + Callee->addFnAttr("uniform-work-group-size", "false"); + Changed = true; + // if the attribute is true, propagate it. + } else { + if (!Callee->hasFnAttribute("uniform-work-group-size")) { + Callee->addFnAttr("uniform-work-group-size", "true"); + Changed = true; + } + } + //Check for nested function calls + Changed |= propagateAttribute(*Callee); + } + } + } + } + return Changed; +} + bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { const GCNSubtarget &ST = TM->getSubtarget(F); bool HasFlat = ST.hasFlatAddressSpace(); @@ -293,16 +325,12 @@ } bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { - Module &M = SCC.getCallGraph().getModule(); - Triple TT(M.getTargetTriple()); - bool Changed = false; for (CallGraphNode *I : SCC) { Function *F = I->getFunction(); if (!F || F->isDeclaration()) continue; - - Changed |= addFeatureAttributes(*F); + Changed |= addFeatureAttributes(*F) | propagateAttribute(*F); } return Changed; Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -667,6 +667,10 @@ } void AMDGPUPassConfig::addCodeGenPrepare() { + + if (TM->getTargetTriple().getArch() == Triple::amdgcn) + addPass(createAMDGPUAnnotateKernelFeaturesPass()); + if (TM->getTargetTriple().getArch() == Triple::amdgcn && EnableLowerKernelArguments) addPass(createAMDGPULowerKernelArgumentsPass()); @@ -749,7 +753,6 @@ // FIXME: We need to run a pass to propagate the attributes when calls are // supported. - addPass(createAMDGPUAnnotateKernelFeaturesPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. Index: test/CodeGen/AMDGPU/uniform-work-group-test1.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-test1.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s + +; Test 1 +; GCN: define void @foo() #[[FOO:[0-9]+]] { +define void @foo() #0 { + ret void +} + +; GCN: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { +define amdgpu_kernel void @kernel1() #1 { + call void @foo() + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { "uniform-work-group-size"="false" } + +; GCN: attributes #[[FOO]] = { nounwind "uniform-work-group-size"="false" } +; GCN: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" } Index: test/CodeGen/AMDGPU/uniform-work-group-test2.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-test2.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s + +; Test 2 +; GCN: define void @boo() #[[BOO:[0-9]+]] { +define void @boo() #2 { + ret void +} + +; GCN: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] { +define amdgpu_kernel void @kernel2() #3 { + call void @boo() + ret void +} + +attributes #2 = { "" } +attributes #3 = { "uniform-work-group-size"="false" } + +; GCN: attributes #[[BOO]] = { "" "uniform-work-group-size"="false" } +; GCN: attributes #[[KERNEL2]] = { "uniform-work-group-size"="false" } Index: test/CodeGen/AMDGPU/uniform-work-group-test3.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-test3.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s + +; Test 3 +; GCN: define void @foo() #[[FOO:[0-9]+]] { +define void @foo() #0 { + ret void +} + +; GCN: define void @boo() #[[BOO:[0-9]+]] { +define void @boo() #1 { + call void @foo() + ret void +} + +; GCN: define amdgpu_kernel void @kernel3() #[[KERNEL3:[0-9]+]] { +define amdgpu_kernel void @kernel3() #2 { + call void @boo() + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { "" } +attributes #2 = { "uniform-work-group-size"="true" } + +; GCN: attributes #[[FOO]] = { nounwind "uniform-work-group-size"="true" } +; GCN: attributes #[[BOO]] = { "" "uniform-work-group-size"="true" } +; GCN: attributes #[[KERNEL3]] = { "uniform-work-group-size"="true" } Index: test/CodeGen/AMDGPU/uniform-work-group-test4.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-test4.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s + +; Test 4 +; GCN: define void @foo() #[[FOO:[0-9]+]] { +define void @foo() #0 { + ret void +} + +; GCN: define void @boo() #[[BOO:[0-9]+]] { +define void @boo() #1 { + call void @foo() + ret void +} + +; GCN: define amdgpu_kernel void @kernel4() #[[KERNEL4:[0-9]+]] { +define amdgpu_kernel void @kernel4() #2 { + call void @boo() + ret void +} + +attributes #0 = { nounwind "uniform-work-group-size"="true"} +attributes #1 = { "" } +attributes #2 = { "uniform-work-group-size"="false" } + +; GCN: attributes #[[FOO]] = { nounwind "uniform-work-group-size"="false" } +; GCN: attributes #[[BOO]] = { "" "uniform-work-group-size"="false" } +; GCN: attributes #[[KERNEL4]] = { "uniform-work-group-size"="false" } Index: test/CodeGen/AMDGPU/uniform-work-group-test5.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-test5.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s + +; Test 5 +; GCN: define void @foo() #[[FOO:[0-9]+]] { +define void @foo() #0 { + ret void +} + +; GCN: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { +define amdgpu_kernel void @kernel1() #1 { + call void @foo() + ret void +} + +; GCN: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] { +define amdgpu_kernel void @kernel2() #2 { + call void @foo() + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { "uniform-work-group-size"="true" } +attributes #2 = { "uniform-work-group-size"="false" } + +; GCN: attributes #[[FOO]] = { nounwind "uniform-work-group-size"="false" } +; GCN: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" } +; GCN: attributes #[[KERNEL2]] = { "uniform-work-group-size"="false" }