Index: llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -48,19 +48,62 @@ namespace { +// Target features to propagate. +static constexpr const FeatureBitset TargetFeatures = { + AMDGPU::FeatureWavefrontSize16, + AMDGPU::FeatureWavefrontSize32, + AMDGPU::FeatureWavefrontSize64 +}; + +// Attributes to propagate. +static constexpr const char* AttributeNames[] = { + "amdgpu-waves-per-eu" +}; + +static constexpr unsigned NumAttr = + sizeof(AttributeNames) / sizeof(AttributeNames[0]); + class AMDGPUPropagateAttributes { - const FeatureBitset TargetFeatures = { - AMDGPU::FeatureWavefrontSize16, - AMDGPU::FeatureWavefrontSize32, - AMDGPU::FeatureWavefrontSize64 + + class FnProperties { + private: + explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} + + public: + explicit FnProperties(const TargetMachine &TM, const Function &F) { + Features = TM.getSubtargetImpl(F)->getFeatureBits(); + + for (unsigned I = 0; I < NumAttr; ++I) + if (F.hasFnAttribute(AttributeNames[I])) + Attributes[I] = F.getFnAttribute(AttributeNames[I]); + } + + bool operator == (const FnProperties &Other) const { + if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) + return false; + for (unsigned I = 0; I < NumAttr; ++I) + if (Attributes[I] != Other.Attributes[I]) + return false; + return true; + } + + FnProperties adjustToCaller(const FnProperties &CallerProps) const { + FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); + for (unsigned I = 0; I < NumAttr; ++I) + New.Attributes[I] = CallerProps.Attributes[I]; + return New; + } + + FeatureBitset Features; + Optional Attributes[NumAttr]; }; - class Clone{ + class Clone { public: - Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : - FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} + Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : + Properties(Props), OrigF(OrigF), NewF(NewF) {} - FeatureBitset FeatureMask; + FnProperties Properties; Function *OrigF; Function *NewF; }; @@ -77,17 +120,19 @@ SmallVector Clones; // Find a clone with required features. - Function *findFunction(const FeatureBitset &FeaturesNeeded, + Function *findFunction(const FnProperties &PropsNeeded, Function *OrigF); - // Clone function F and set NewFeatures on the clone. + // Clone function \p F and set \p NewProps on the clone. // Cole takes the name of original function. - Function *cloneWithFeatures(Function &F, - const FeatureBitset &NewFeatures); + Function *cloneWithProperties(Function &F, const FnProperties &NewProps); // Set new function's features in place. void setFeatures(Function &F, const FeatureBitset &NewFeatures); + // Set new function's attributes in place. + void setAttributes(Function &F, const ArrayRef> NewAttrs); + std::string getFeatureString(const FeatureBitset &Features) const; // Propagate attributes from Roots. @@ -155,11 +200,11 @@ false, false) Function * -AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, +AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, Function *OrigF) { // TODO: search for clone's clones. for (Clone &C : Clones) - if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) + if (C.OrigF == OrigF && PropsNeeded == C.Properties) return C.NewF; return nullptr; @@ -195,8 +240,7 @@ if (F.isDeclaration()) continue; - const FeatureBitset &CalleeBits = - TM->getSubtargetImpl(F)->getFeatureBits(); + const FnProperties CalleeProps(*TM, F); SmallVector, 32> ToReplace; SmallSet Visited; @@ -213,32 +257,31 @@ if (!Roots.count(Caller) && !NewRoots.count(Caller)) continue; - const FeatureBitset &CallerBits = - TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; + const FnProperties CallerProps(*TM, *Caller); - if (CallerBits == (CalleeBits & TargetFeatures)) { + if (CalleeProps == CallerProps) { if (!Roots.count(&F)) NewRoots.insert(&F); continue; } - Function *NewF = findFunction(CallerBits, &F); + Function *NewF = findFunction(CallerProps, &F); if (!NewF) { - FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | - CallerBits); + const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); if (!AllowClone) { // This may set different features on different iteartions if // there is a contradiction in callers' attributes. In this case // we rely on a second pass running on Module, which is allowed // to clone. - setFeatures(F, NewFeatures); + setFeatures(F, NewProps.Features); + setAttributes(F, NewProps.Attributes); NewRoots.insert(&F); Changed = true; break; } - NewF = cloneWithFeatures(F, NewFeatures); - Clones.push_back(Clone(CallerBits, &F, NewF)); + NewF = cloneWithProperties(F, NewProps); + Clones.push_back(Clone(CallerProps, &F, NewF)); NewRoots.insert(NewF); } @@ -267,13 +310,14 @@ } Function * -AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, - const FeatureBitset &NewFeatures) { +AMDGPUPropagateAttributes::cloneWithProperties(Function &F, + const FnProperties &NewProps) { LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); ValueToValueMapTy dummy; Function *NewF = CloneFunction(&F, dummy); - setFeatures(*NewF, NewFeatures); + setFeatures(*NewF, NewProps.Features); + setAttributes(*NewF, NewProps.Attributes); NewF->setVisibility(GlobalValue::DefaultVisibility); NewF->setLinkage(GlobalValue::InternalLinkage); @@ -300,6 +344,18 @@ F.addFnAttr("target-features", NewFeatureStr); } +void AMDGPUPropagateAttributes::setAttributes(Function &F, + const ArrayRef> NewAttrs) { + LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); + for (unsigned I = 0; I < NumAttr; ++I) { + F.removeFnAttr(AttributeNames[I]); + if (NewAttrs[I]) { + LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); + F.addFnAttr(*NewAttrs[I]); + } + } +} + std::string AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const { Index: llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll +++ llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll @@ -55,11 +55,11 @@ ; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4 ; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4 ; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4 -; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" } +; OPT: attributes #0 = { {{.*}} "amdgpu-waves-per-eu"="1,1" "target-features"="+wavefrontsize64" } ; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" } -; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" } +; OPT: attributes #2 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="+wavefrontsize32" } ; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" } -; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" } +; OPT: attributes #4 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" } ; LLC: foo3: ; LLC: sample asm @@ -94,7 +94,7 @@ ret void } -define void @foo3() #1 { +define void @foo3() #4 { entry: call void asm sideeffect "; sample asm", ""() ret void @@ -135,7 +135,8 @@ ret void } -attributes #0 = { nounwind "target-features"="+wavefrontsize32" } -attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" } +attributes #0 = { nounwind "target-features"="+wavefrontsize32" "amdgpu-waves-per-eu"="2,4" } +attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="1,1" } attributes #2 = { nounwind "target-features"="+wavefrontsize64" } attributes #3 = { nounwind "target-features"="+wavefrontsize64" } +attributes #4 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="2,4" }