Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -48,6 +48,8 @@ const TargetMachine *TM = nullptr; bool addFeatureAttributes(Function &F); + bool processUniformWorkGroup(CallGraphSCC &SCC); + bool propagteAttribute(); public: static char ID; @@ -72,12 +74,23 @@ SmallPtrSet &ConstantExprVisited); }; +class NodeWrapper { + public: + CallGraphNode *Owner; + std::list wrapperlist; + unsigned num_uses; + +}; + } // end anonymous namespace char AMDGPUAnnotateKernelFeatures::ID = 0; char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; +std::list node_list; +std::map node_map; + INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) @@ -213,6 +226,67 @@ handleAttr(Parent, Callee, AttrName); } +bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroup(CallGraphSCC &SCC) { + bool Changed = false; + + for (CallGraphSCC::iterator it = SCC.begin(), end = SCC.end(); it != end; ++it) { + //Create NodeWrappers for the nodes and append them to node_list + if ((*it)->getNumReferences()) { + NodeWrapper *temp = new NodeWrapper; + temp->Owner = *it; + temp->num_uses = (*it)->getNumReferences(); + //Push the callees to the back and the callers to the front + node_list.push_front(temp); + + //Update node_map with reference to the NodeWrapper for all the nodes + node_map.insert(std::pair((*it), temp)); + + } else { + Changed = propagteAttribute(); + } + } + return Changed; +} + +bool AMDGPUAnnotateKernelFeatures::propagteAttribute() { + bool Changed = false; + for (std::list::iterator it = node_list.begin(); it != node_list.end(); ++it) { + + //Adding the callees to the wrapper list + for (CallGraphNode::iterator iter = (*it)->Owner->begin(), end = (*it)->Owner->end(); iter != end; ++iter) + (*it)->wrapperlist.push_back(node_map[std::get<1>(*iter)]); + + //Iterating over wrapper list and propagating attributes + for (std::list::iterator iter = (*it)->wrapperlist.begin(), end = (*it)->wrapperlist.end(); iter != end; ++iter) { + Function *Caller = (*it)->Owner->getFunction(); + Function *Callee = (*iter)->Owner->getFunction(); + //Check if the Caller has the attribute + if (Caller->hasFnAttribute("uniform-work-group-size")) { + //Check if th evalue of the value of the attribute is true + if (Callee) { + if (Caller->getFnAttribute("uniform-work-group-size").getValueAsString().equals("true")) { + //Propagate the attribute to the callee, if it does not have it + if (!Callee->hasFnAttribute("uniform-work-group-size")) { + Callee->addFnAttr("uniform-work-group-size", "true"); + Changed = true; + } + } else { + Callee->addFnAttr("uniform-work-group-size", "false"); + Changed = true; + } + } + } else { + //If caller does not have the attribute, clear the attribute from the Callee if it is true + Caller->addFnAttr("uniform-work-group-size", "false"); + if (Callee) + Callee->addFnAttr("uniform-work-group-size", "false"); + Changed = true; + } + } + } + return Changed; +} + bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { const GCNSubtarget &ST = TM->getSubtarget(F); bool HasFlat = ST.hasFlatAddressSpace(); @@ -293,18 +367,18 @@ } bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { - Module &M = SCC.getCallGraph().getModule(); - Triple TT(M.getTargetTriple()); - bool Changed = false; + + //propagate uniform-work-group-attribute + Changed |= processUniformWorkGroup(SCC); + for (CallGraphNode *I : SCC) { - Function *F = I->getFunction(); + Function *F = I->getFunction(); + //add feature attributes if (!F || F->isDeclaration()) continue; - Changed |= addFeatureAttributes(*F); } - return Changed; } Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -680,6 +680,9 @@ } void AMDGPUPassConfig::addCodeGenPrepare() { + if (TM->getTargetTriple().getArch() == Triple::amdgcn) + addPass(createAMDGPUAnnotateKernelFeaturesPass()); + if (TM->getTargetTriple().getArch() == Triple::amdgcn && EnableLowerKernelArguments) addPass(createAMDGPULowerKernelArgumentsPass()); @@ -767,7 +770,6 @@ // FIXME: We need to run a pass to propagate the attributes when calls are // supported. - addPass(createAMDGPUAnnotateKernelFeaturesPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. Index: test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -244,52 +244,52 @@ ret void } -; HSA: define void @use_implicitarg_ptr() #15 { +; HSA: define void @use_implicitarg_ptr() #16 { define void @use_implicitarg_ptr() #1 { %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef ret void } -; HSA: define void @func_indirect_use_implicitarg_ptr() #15 { +; HSA: define void @func_indirect_use_implicitarg_ptr() #16 { define void @func_indirect_use_implicitarg_ptr() #1 { call void @use_implicitarg_ptr() ret void } -; HSA: declare void @external.func() #16 +; HSA: declare void @external.func() #17 declare void @external.func() #3 -; HSA: define internal void @defined.func() #16 { +; HSA: define internal void @defined.func() #17 { define internal void @defined.func() #3 { ret void } -; HSA: define void @func_call_external() #16 { +; HSA: define void @func_call_external() #17 { define void @func_call_external() #3 { call void @external.func() ret void } -; HSA: define void @func_call_defined() #16 { +; HSA: define void @func_call_defined() #17 { define void @func_call_defined() #3 { call void @defined.func() ret void } -; HSA: define void @func_call_asm() #16 { +; HSA: define void @func_call_asm() #17 { define void @func_call_asm() #3 { call void asm sideeffect "", ""() #3 ret void } -; HSA: define amdgpu_kernel void @kern_call_external() #17 { +; HSA: define amdgpu_kernel void @kern_call_external() #18 { define amdgpu_kernel void @kern_call_external() #3 { call void @external.func() ret void } -; HSA: define amdgpu_kernel void @func_kern_defined() #17 { +; HSA: define amdgpu_kernel void @func_kern_defined() #18 { define amdgpu_kernel void @func_kern_defined() #3 { call void @defined.func() ret void @@ -301,20 +301,20 @@ attributes #3 = { nounwind } ; HSA: attributes #0 = { nounwind readnone speculatable } -; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" } -; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" } -; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" } -; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" } -; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" } -; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" } -; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" } -; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" } -; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" } +; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #5 = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #6 = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" } -; HSA: attributes #11 = { nounwind "target-cpu"="fiji" } -; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" } -; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" } -; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" } +; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" } -; HSA: attributes #16 = { nounwind } -; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" } +; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" } Index: test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -0,0 +1,18 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s + +; Test 1 +; "If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false" +; CHECK: define void @foo() #[[FOO:[0-9]+]] { +define void @foo() #0 { + ret void +} + +; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] { +define amdgpu_kernel void @kernel1() #1 { + call void @foo() + ret void +} + +attributes #0 = { "uniform-work-group-size"="true" } + +; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" } Index: test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -0,0 +1,24 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s + +; Test 3 +; "Test to verify if the attribute gets propagated across nested function calls" +; CHECK: define void @func1() #[[FUNC:[0-9]+]] { +define void @func1() #0 { + ret void +} + +; CHECK: define void @func2() #[[FUNC]] { +define void @func2() #1 { + call void @func1() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] { +define amdgpu_kernel void @kernel3() #2 { + call void @func2() + ret void +} + +attributes #2 = { "uniform-work-group-size"="true" } + +; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" } Index: test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -0,0 +1,25 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s + +; Test 5 +; "Two kernels with different values of the uniform-work-group-attribute call the same function" +; CHECK: define void @func() #[[FUNC:[0-9]+]] { +define void @func() #0 { + ret void +} + +; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] { +define amdgpu_kernel void @kernel1() #1 { + call void @func() + ret void +} + +; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] { +define amdgpu_kernel void @kernel2() #2 { + call void @func() + ret void +} + +attributes #1 = { "uniform-work-group-size"="true" } + +; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" } Index: test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -0,0 +1,20 @@ +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s + +; Test 2 +; "Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it" +; CHECK: define void @func() #[[FUNC:[0-9]+]] { +define void @func() #0 { + ret void +} + +; CHECK: define amdgpu_kernel void @kernel() #[[KERNEL:[0-9]+]] { +define amdgpu_kernel void @kernel() #1 { + call void @func() + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { "uniform-work-group-size"="false" } + +; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" } +; CHECK: attributes #[[KERNEL]] = { "uniform-work-group-size"="false" }