Index: lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -235,6 +235,26 @@ if (!CS.isInlineAsm()) HaveCall = true; continue; + } + + CallingConv::ID CC = F.getCallingConv(); + if (ST.isAmdHsaOS()) { + if (CC == CallingConv::AMDGPU_KERNEL) { + //Check for uniform workgroup size attribute + if (F.hasFnAttribute("uniform-work-group-size")) { + if (Callee->hasFnAttribute("uniform-work-group-size")) { + Attribute attr = Callee->getFnAttribute("uniform-work-group-size"); + if (attr.getValueAsString().equals("false")) + continue; + else if (attr.getValueAsString().equals("true")) + Callee->addFnAttr(F.getFnAttribute("uniform-work-group-size")); + } + else { + //Copy the kernel attribute to the function + Callee->addFnAttr(F.getFnAttribute("uniform-work-group-size")); + } + } + } } Intrinsic::ID IID = Callee->getIntrinsicID(); Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -667,6 +667,10 @@ } void AMDGPUPassConfig::addCodeGenPrepare() { + + if (TM->getTargetTriple().getArch() == Triple::amdgcn ) + addPass(createAMDGPUAnnotateKernelFeaturesPass()); + if (TM->getTargetTriple().getArch() == Triple::amdgcn && EnableLowerKernelArguments) addPass(createAMDGPULowerKernelArgumentsPass()); @@ -749,7 +753,7 @@ // FIXME: We need to run a pass to propagate the attributes when calls are // supported. - addPass(createAMDGPUAnnotateKernelFeaturesPass()); + //addPass(createAMDGPUAnnotateKernelFeaturesPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them.