Index: llvm/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.h +++ llvm/lib/Target/AMDGPU/AMDGPU.h @@ -53,8 +53,6 @@ FunctionPass *createAMDGPUCodeGenPreparePass(); FunctionPass *createAMDGPULateCodeGenPreparePass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); -FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); -ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass *createAMDGPULowerModuleLDSPass(); FunctionPass *createSIModeRegisterPass(); @@ -116,30 +114,6 @@ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); -extern char &AMDGPUPropagateAttributesEarlyID; - -struct AMDGPUPropagateAttributesEarlyPass - : PassInfoMixin { - AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - -private: - TargetMachine &TM; -}; - -void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); -extern char &AMDGPUPropagateAttributesLateID; - -struct AMDGPUPropagateAttributesLatePass - : PassInfoMixin { - AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - -private: - TargetMachine &TM; -}; - void initializeAMDGPULowerModuleLDSPass(PassRegistry &); extern char &AMDGPULowerModuleLDSID; Index: llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ /dev/null @@ -1,392 +0,0 @@ -//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief This pass propagates attributes from kernels to the non-entry -/// functions. Most of the library functions were not compiled for specific ABI, -/// yet will be correctly compiled if proper attributes are propagated from the -/// caller. -/// -/// The pass analyzes call graph and propagates ABI target features through the -/// call graph. -/// -/// It can run in two modes: as a function or module pass. A function pass -/// simply propagates attributes. A module pass clones functions if there are -/// callers with different ABI. If a function is cloned all call sites will -/// be updated to use a correct clone. -/// -/// A function pass is limited in functionality but can run early in the -/// pipeline. A module pass is more powerful but has to run late, so misses -/// library folding opportunities. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Utils/Cloning.h" - -#define DEBUG_TYPE "amdgpu-propagate-attributes" - -using namespace llvm; - -namespace llvm { -extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; -} - -namespace { - -// Target features to propagate. -static constexpr const FeatureBitset TargetFeatures = { - AMDGPU::FeatureWavefrontSize16, - AMDGPU::FeatureWavefrontSize32, - AMDGPU::FeatureWavefrontSize64 -}; - -class AMDGPUPropagateAttributes { - - class FnProperties { - private: - explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} - - public: - explicit FnProperties(const TargetMachine &TM, const Function &F) { - Features = TM.getSubtargetImpl(F)->getFeatureBits(); - } - - bool operator == (const FnProperties &Other) const { - if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) - return false; - return true; - } - - FnProperties adjustToCaller(const FnProperties &CallerProps) const { - FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); - return New; - } - - FeatureBitset Features; - }; - - class Clone { - public: - Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : - Properties(Props), OrigF(OrigF), NewF(NewF) {} - - FnProperties Properties; - Function *OrigF; - Function *NewF; - }; - - const TargetMachine *TM; - - // Clone functions as needed or just set attributes. - bool AllowClone; - - // Option propagation roots. - SmallSet Roots; - - // Clones of functions with their attributes. - SmallVector Clones; - - // Find a clone with required features. - Function *findFunction(const FnProperties &PropsNeeded, - Function *OrigF); - - // Clone function \p F and set \p NewProps on the clone. - // Cole takes the name of original function. - Function *cloneWithProperties(Function &F, const FnProperties &NewProps); - - // Set new function's features in place. - void setFeatures(Function &F, const FeatureBitset &NewFeatures); - - std::string getFeatureString(const FeatureBitset &Features) const; - - // Propagate attributes from Roots. - bool process(); - -public: - AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : - TM(TM), AllowClone(AllowClone) {} - - // Use F as a root and propagate its attributes. - bool process(Function &F); - - // Propagate attributes starting from kernel functions. - bool process(Module &M); -}; - -// Allows to propagate attributes early, but no cloning is allowed as it must -// be a function pass to run before any optimizations. -// TODO: We shall only need a one instance of module pass, but that needs to be -// in the linker pipeline which is currently not possible. -class AMDGPUPropagateAttributesEarly : public FunctionPass { - const TargetMachine *TM; - -public: - static char ID; // Pass identification - - AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : - FunctionPass(ID), TM(TM) { - initializeAMDGPUPropagateAttributesEarlyPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; -}; - -// Allows to propagate attributes with cloning but does that late in the -// pipeline. -class AMDGPUPropagateAttributesLate : public ModulePass { - const TargetMachine *TM; - -public: - static char ID; // Pass identification - - AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : - ModulePass(ID), TM(TM) { - initializeAMDGPUPropagateAttributesLatePass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; -}; - -} // end anonymous namespace. - -char AMDGPUPropagateAttributesEarly::ID = 0; -char AMDGPUPropagateAttributesLate::ID = 0; - -INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, - "amdgpu-propagate-attributes-early", - "Early propagate attributes from kernels to functions", - false, false) -INITIALIZE_PASS(AMDGPUPropagateAttributesLate, - "amdgpu-propagate-attributes-late", - "Late propagate attributes from kernels to functions", - false, false) - -Function * -AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, - Function *OrigF) { - // TODO: search for clone's clones. - for (Clone &C : Clones) - if (C.OrigF == OrigF && PropsNeeded == C.Properties) - return C.NewF; - - return nullptr; -} - -bool AMDGPUPropagateAttributes::process(Module &M) { - for (auto &F : M.functions()) - if (AMDGPU::isKernel(F.getCallingConv())) - Roots.insert(&F); - - return Roots.empty() ? false : process(); -} - -bool AMDGPUPropagateAttributes::process(Function &F) { - Roots.insert(&F); - return process(); -} - -bool AMDGPUPropagateAttributes::process() { - bool Changed = false; - SmallSet NewRoots; - SmallSet Replaced; - - assert(!Roots.empty()); - Module &M = *(*Roots.begin())->getParent(); - - do { - Roots.insert(NewRoots.begin(), NewRoots.end()); - NewRoots.clear(); - - for (auto &F : M.functions()) { - if (F.isDeclaration()) - continue; - - const FnProperties CalleeProps(*TM, F); - SmallVector, 32> ToReplace; - SmallSet Visited; - - for (User *U : F.users()) { - Instruction *I = dyn_cast(U); - if (!I) - continue; - CallBase *CI = dyn_cast(I); - // Only propagate attributes if F is the called function. Specifically, - // do not propagate attributes if F is passed as an argument. - // FIXME: handle bitcasted callee, e.g. - // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)() - if (!CI || CI->getCalledOperand() != &F) - continue; - Function *Caller = CI->getCaller(); - if (!Caller || !Visited.insert(CI).second) - continue; - if (!Roots.count(Caller) && !NewRoots.count(Caller)) - continue; - - const FnProperties CallerProps(*TM, *Caller); - - if (CalleeProps == CallerProps) { - if (!Roots.count(&F)) - NewRoots.insert(&F); - continue; - } - - Function *NewF = findFunction(CallerProps, &F); - if (!NewF) { - const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); - if (!AllowClone) { - // This may set different features on different iterations if - // there is a contradiction in callers' attributes. In this case - // we rely on a second pass running on Module, which is allowed - // to clone. - setFeatures(F, NewProps.Features); - NewRoots.insert(&F); - Changed = true; - break; - } - - NewF = cloneWithProperties(F, NewProps); - Clones.push_back(Clone(CallerProps, &F, NewF)); - NewRoots.insert(NewF); - } - - ToReplace.push_back(std::pair(CI, NewF)); - Replaced.insert(&F); - - Changed = true; - } - - while (!ToReplace.empty()) { - auto R = ToReplace.pop_back_val(); - R.first->setCalledFunction(R.second); - } - } - } while (!NewRoots.empty()); - - for (Function *F : Replaced) { - if (F->use_empty()) - F->eraseFromParent(); - } - - Roots.clear(); - Clones.clear(); - - return Changed; -} - -Function * -AMDGPUPropagateAttributes::cloneWithProperties(Function &F, - const FnProperties &NewProps) { - LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); - - ValueToValueMapTy dummy; - Function *NewF = CloneFunction(&F, dummy); - setFeatures(*NewF, NewProps.Features); - NewF->setVisibility(GlobalValue::DefaultVisibility); - NewF->setLinkage(GlobalValue::InternalLinkage); - - // Swap names. If that is the only clone it will retain the name of now - // dead value. Preserve original name for externally visible functions. - if (F.hasName() && F.hasLocalLinkage()) { - std::string NewName = std::string(NewF->getName()); - NewF->takeName(&F); - F.setName(NewName); - } - - return NewF; -} - -void AMDGPUPropagateAttributes::setFeatures(Function &F, - const FeatureBitset &NewFeatures) { - std::string NewFeatureStr = getFeatureString(NewFeatures); - - LLVM_DEBUG(dbgs() << "Set features " - << getFeatureString(NewFeatures & TargetFeatures) - << " on " << F.getName() << '\n'); - - F.removeFnAttr("target-features"); - F.addFnAttr("target-features", NewFeatureStr); -} - -std::string -AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const -{ - std::string Ret; - for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { - if (Features[KV.Value]) - Ret += (StringRef("+") + KV.Key + ",").str(); - else if (TargetFeatures[KV.Value]) - Ret += (StringRef("-") + KV.Key + ",").str(); - } - Ret.pop_back(); // Remove last comma. - return Ret; -} - -bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { - if (!TM) { - auto *TPC = getAnalysisIfAvailable(); - if (!TPC) - return false; - - TM = &TPC->getTM(); - } - - if (!AMDGPU::isKernel(F.getCallingConv())) - return false; - - return AMDGPUPropagateAttributes(TM, false).process(F); -} - -bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { - if (!TM) { - auto *TPC = getAnalysisIfAvailable(); - if (!TPC) - return false; - - TM = &TPC->getTM(); - } - - return AMDGPUPropagateAttributes(TM, true).process(M); -} - -FunctionPass -*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { - return new AMDGPUPropagateAttributesEarly(TM); -} - -ModulePass -*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { - return new AMDGPUPropagateAttributesLate(TM); -} - -PreservedAnalyses -AMDGPUPropagateAttributesEarlyPass::run(Function &F, - FunctionAnalysisManager &AM) { - if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) - return PreservedAnalyses::all(); - - return AMDGPUPropagateAttributes(&TM, false).process(F) - ? PreservedAnalyses::none() - : PreservedAnalyses::all(); -} - -PreservedAnalyses -AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) { - return AMDGPUPropagateAttributes(&TM, true).process(M) - ? PreservedAnalyses::none() - : PreservedAnalyses::all(); -} Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -397,8 +397,6 @@ initializeAMDGPUPromoteAllocaToVectorPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); initializeAMDGPULateCodeGenPreparePass(*PR); - initializeAMDGPUPropagateAttributesEarlyPass(*PR); - initializeAMDGPUPropagateAttributesLatePass(*PR); initializeAMDGPURemoveIncompatibleFunctionsPass(*PR); initializeAMDGPULowerModuleLDSPass(*PR); initializeAMDGPURewriteOutArgumentsPass(*PR); @@ -608,12 +606,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerPipelineParsingCallback( - [this](StringRef PassName, ModulePassManager &PM, - ArrayRef) { - if (PassName == "amdgpu-propagate-attributes-late") { - PM.addPass(AMDGPUPropagateAttributesLatePass(*this)); - return true; - } + [](StringRef PassName, ModulePassManager &PM, + ArrayRef) { if (PassName == "amdgpu-unify-metadata") { PM.addPass(AMDGPUUnifyMetadataPass()); return true; @@ -659,10 +653,6 @@ PM.addPass(AMDGPULowerKernelAttributesPass()); return true; } - if (PassName == "amdgpu-propagate-attributes-early") { - PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this)); - return true; - } if (PassName == "amdgpu-promote-kernel-arguments") { PM.addPass(AMDGPUPromoteKernelArgumentsPass()); return true; @@ -698,7 +688,6 @@ PB.registerPipelineStartEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { FunctionPassManager FPM; - FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this)); FPM.addPass(AMDGPUUseNativeCallsPass()); if (EnableLibCallSimplify && Level != OptimizationLevel::O0) FPM.addPass(AMDGPUSimplifyLibCallsPass(*this)); @@ -706,7 +695,7 @@ }); PB.registerPipelineEarlySimplificationEPCallback( - [this](ModulePassManager &PM, OptimizationLevel Level) { + [](ModulePassManager &PM, OptimizationLevel Level) { PM.addPass(AMDGPUPrintfRuntimeBindingPass()); if (Level == OptimizationLevel::O0) @@ -716,11 +705,9 @@ if (InternalizeSymbols) { PM.addPass(InternalizePass(mustPreserveGV)); - } - PM.addPass(AMDGPUPropagateAttributesLatePass(*this)); - if (InternalizeSymbols) { PM.addPass(GlobalDCEPass()); } + if (EarlyInlineAll && !EnableFunctionCalls) PM.addPass(AMDGPUAlwaysInlinePass()); }); @@ -992,9 +979,6 @@ if (LowerCtorDtor) addPass(createAMDGPUCtorDtorLoweringLegacyPass()); - // A call to propagate attributes pass in the backend in case opt was not run. - addPass(createAMDGPUPropagateAttributesEarlyPass(&TM)); - // Function calls are not supported, so make sure we inline everything. addPass(createAMDGPUAlwaysInlinePass()); addPass(createAlwaysInlinerLegacyPass()); Index: llvm/lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- llvm/lib/Target/AMDGPU/CMakeLists.txt +++ llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -83,7 +83,6 @@ AMDGPUPreLegalizerCombiner.cpp AMDGPUPrintfRuntimeBinding.cpp AMDGPUPromoteAlloca.cpp - AMDGPUPropagateAttributes.cpp AMDGPUPromoteKernelArguments.cpp AMDGPURegBankCombiner.cpp AMDGPURegBankSelect.cpp Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -32,8 +32,6 @@ ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Dominator Tree Construction ; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU -; GCN-O0-NEXT: FunctionPass Manager -; GCN-O0-NEXT: Early propagate attributes from kernels to functions ; GCN-O0-NEXT: AMDGPU Inline All Functions ; GCN-O0-NEXT: Inliner for always_inline functions ; GCN-O0-NEXT: FunctionPass Manager @@ -179,8 +177,6 @@ ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU -; GCN-O1-NEXT: FunctionPass Manager -; GCN-O1-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-NEXT: AMDGPU Inline All Functions ; GCN-O1-NEXT: Inliner for always_inline functions ; GCN-O1-NEXT: FunctionPass Manager @@ -457,8 +453,6 @@ ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU -; GCN-O1-OPTS-NEXT: FunctionPass Manager -; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions ; GCN-O1-OPTS-NEXT: Inliner for always_inline functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager @@ -758,8 +752,6 @@ ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU -; GCN-O2-NEXT: FunctionPass Manager -; GCN-O2-NEXT: Early propagate attributes from kernels to functions ; GCN-O2-NEXT: AMDGPU Inline All Functions ; GCN-O2-NEXT: Inliner for always_inline functions ; GCN-O2-NEXT: FunctionPass Manager @@ -1069,8 +1061,6 @@ ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU -; GCN-O3-NEXT: FunctionPass Manager -; GCN-O3-NEXT: Early propagate attributes from kernels to functions ; GCN-O3-NEXT: AMDGPU Inline All Functions ; GCN-O3-NEXT: Inliner for always_inline functions ; GCN-O3-NEXT: FunctionPass Manager Index: llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll +++ /dev/null @@ -1,145 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefixes=OPT,OPT-EXT %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default' < %s | FileCheck -check-prefixes=OPT,OPT-EXT %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 --amdgpu-internalize-symbols < %s | FileCheck -check-prefixes=OPT,OPT-INT %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default' --amdgpu-internalize-symbols < %s | FileCheck -check-prefixes=OPT,OPT-INT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s - -; OPT: declare void @foo4() local_unnamed_addr #0 -; OPT: define internal fastcc void @0() unnamed_addr #1 -; OPT-EXT: define void @foo3() local_unnamed_addr #1 -; OPT-INT: define internal fastcc void @foo3.2() unnamed_addr #1 -; OPT-EXT: define void @foo2() local_unnamed_addr #1 -; OPT-INT: define internal fastcc void @foo2.3() unnamed_addr #1 -; OPT-EXT: define void @foo1() local_unnamed_addr #1 -; OPT-EXT: tail call void @foo4() -; OPT-EXT: tail call void @foo3() -; OPT-EXT: tail call void @foo2() -; OPT-EXT: tail call void @foo2() -; OPT-EXT: tail call void @foo1() -; OPT-EXT: tail call fastcc void @0() -; OPT-INT: define internal fastcc void @foo1.1() unnamed_addr #1 -; OPT-INT: tail call void @foo4() -; OPT-INT: tail call fastcc void @foo3.2() -; OPT-INT: tail call fastcc void @foo2.3() -; OPT-INT: tail call fastcc void @foo2.3() -; OPT-INT: tail call fastcc void @foo1.1() -; OPT-INT: tail call fastcc void @0() -; OPT: ret void -; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2 -; OPT-EXT: tail call fastcc void @foo1.1() -; OPT-INT: tail call fastcc void @foo1() -; OPT: ret void -; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #3 -; OPT-EXT: tail call void @foo2() -; OPT-INT: tail call fastcc void @foo2.3() -; OPT: ret void -; OPT: define amdgpu_kernel void @kernel3() local_unnamed_addr #3 -; OPT-EXT: tail call void @foo1() -; OPT-INT: tail call fastcc void @foo1.1() -; OPT: ret void -; OPT-EXT: define internal fastcc void @foo1.1() unnamed_addr #4 -; OPT-EXT: tail call void @foo4() -; OPT-EXT: tail call fastcc void @foo3.2() -; OPT-EXT: tail call fastcc void @foo2.3() -; OPT-EXT: tail call fastcc void @foo2.3() -; OPT-EXT: tail call fastcc void @foo1.1() -; OPT-EXT: tail call fastcc void @1() -; OPT-INT: define internal fastcc void @foo1() unnamed_addr #4 -; OPT-INT: tail call void @foo4() -; OPT-INT: tail call fastcc void @foo3() -; OPT-INT: tail call fastcc void @foo2() -; OPT-INT: tail call fastcc void @foo2() -; OPT-INT: tail call fastcc void @foo1() -; OPT-INT: tail call fastcc void @1() -; OPT: ret void -; OPT: define internal fastcc void @1() unnamed_addr #4 -; OPT-EXT: define internal fastcc void @foo3.2() unnamed_addr #4 -; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4 -; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4 -; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4 -; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" } -; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" } -; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" } -; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" } -; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" } - -; LLC: foo3: -; LLC: sample asm -; LLC: foo2: -; LLC: sample asm -; LLC: foo1: -; LLC: foo4@gotpcrel32@lo+4 -; LLC: foo4@gotpcrel32@hi+12 -; LLC: foo3@gotpcrel32@lo+4 -; LLC: foo3@gotpcrel32@hi+12 -; LLC: foo2@gotpcrel32@lo+4 -; LLC: foo2@gotpcrel32@hi+12 -; LLC: foo1@gotpcrel32@lo+4 -; LLC: foo1@gotpcrel32@hi+12 -; LLC: __unnamed_1@gotpcrel32@lo+4 -; LLC: __unnamed_1@gotpcrel32@hi+12 -; LLC: kernel1: -; LLC: foo1@gotpcrel32@lo+4 -; LLC: foo1@gotpcrel32@hi+12 -; LLC: kernel2: -; LLC: foo2@gotpcrel32@lo+4 -; LLC: foo2@gotpcrel32@hi+12 -; LLC: kernel3: -; LLC: foo1@gotpcrel32@lo+4 -; LLC: foo1@gotpcrel32@hi+12 - -declare void @foo4() #1 - -define void @0() #1 { -entry: - call void asm sideeffect "; sample asm", ""() - ret void -} - -define void @foo3() #4 { -entry: - call void asm sideeffect "; sample asm", ""() - ret void -} - -define void @foo2() #1 { -entry: - call void asm sideeffect "; sample asm", ""() - ret void -} - -define void @foo1() #1 { -entry: - tail call void @foo4() - tail call void @foo3() - tail call void @foo2() - tail call void @foo2() - tail call void @foo1() - tail call void @0() - ret void -} - -define amdgpu_kernel void @kernel1() #0 { -entry: - tail call void @foo1() - ret void -} - -define amdgpu_kernel void @kernel2() #2 { -entry: - tail call void @foo2() - ret void -} - -define amdgpu_kernel void @kernel3() #3 { -entry: - tail call void @foo1() - ret void -} - -attributes #0 = { nounwind "target-features"="+wavefrontsize32" } -attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" } -attributes #2 = { nounwind "target-features"="+wavefrontsize64" } -attributes #3 = { nounwind "target-features"="+wavefrontsize64" } -attributes #4 = { noinline nounwind "target-features"="+wavefrontsize64" } Index: llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll +++ /dev/null @@ -1,40 +0,0 @@ -; This is a regression test for a bug in the AMDGPU Propagate Attributes pass -; where a call instruction's callee could be replaced with a function pointer -; passed to the original call instruction as an argument. -; -; Example: -; `call void @f(ptr @g)` -; could become -; `call void @g(ptr @g.1)` -; which is invalid IR. - -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-propagate-attributes-late %s | FileCheck %s - -; CHECK-LABEL: define amdgpu_kernel void @thiswasabug() #0 -; CHECK-NOT: call void @g(ptr @g.1) -; CHECK-DAG: call void @f(ptr @g.1) -; CHECK-DAG: call void @g() -define amdgpu_kernel void @thiswasabug() #0 { - ; no replacement, but @g should be renamed to @g.1 - call void @f(ptr @g) - - ; this should call the clone, which takes the name @g - call void @g() - ret void -} - -define private void @f(ptr nocapture %0) #0 { - ret void -} - -; In order to expose this bug, it is necessary that `g` have one of the -; propagated attributes, so that a clone and substitution would take place if g -; were actually the function being called. -; CHECK-DAG: define private void @g.1() #1 -; CHECK-DAG: define internal void @g() #2 -define private void @g() #1 { - ret void -} - -attributes #0 = { noinline } -attributes #1 = { noinline "target-features"="+wavefrontsize32" } Index: llvm/test/CodeGen/AMDGPU/propagate-attributes-single-set.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/propagate-attributes-single-set.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default' < %s | FileCheck -check-prefix=OPT %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s - -; OPT: declare void @foo4() local_unnamed_addr #0 -; OPT: define void @foo3() local_unnamed_addr #1 -; OPT: define void @foo2() local_unnamed_addr #1 -; OPT: define void @foo1() local_unnamed_addr #1 -; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2 -; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #2 -; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" } -; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64 -; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32 -; OPT: attributes #3 = { nounwind } - -; LLC: foo3: -; LLC: sample asm -; LLC: foo2: -; LLC: sample asm -; LLC: foo1: -; LLC: foo4@gotpcrel32@lo+4 -; LLC: foo4@gotpcrel32@hi+12 -; LLC: foo3@gotpcrel32@lo+4 -; LLC: foo3@gotpcrel32@hi+12 -; LLC: foo2@gotpcrel32@lo+4 -; LLC: foo2@gotpcrel32@hi+12 -; LLC: foo1@gotpcrel32@lo+4 -; LLC: foo1@gotpcrel32@hi+12 -; LLC: kernel1: -; LLC: foo1@gotpcrel32@lo+4 -; LLC: foo1@gotpcrel32@hi+12 -; LLC: kernel2: -; LLC: foo2@gotpcrel32@lo+4 -; LLC: foo2@gotpcrel32@hi+12 - -declare void @foo4() #1 - -define void @foo3() #1 { -entry: - call void asm sideeffect "; sample asm", ""() - ret void -} - -define void @foo2() #1 { -entry: - call void asm sideeffect "; sample asm", ""() - ret void -} - -define void @foo1() #1 { -entry: - tail call void @foo4() - tail call void @foo3() - tail call void @foo2() - tail call void @foo2() - tail call void @foo1() - ret void -} - -define amdgpu_kernel void @kernel1() #0 { -entry: - tail call void @foo1() - ret void -} - -define amdgpu_kernel void @kernel2() #0 { -entry: - tail call void @foo2() - ret void -} - -attributes #0 = { nounwind "target-features"="+wavefrontsize32" } -attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" } Index: llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -169,7 +169,6 @@ "AMDGPUPrintfRuntimeBinding.cpp", "AMDGPUPromoteAlloca.cpp", "AMDGPUPromoteKernelArguments.cpp", - "AMDGPUPropagateAttributes.cpp", "AMDGPURegBankCombiner.cpp", "AMDGPURegBankSelect.cpp", "AMDGPURegisterBankInfo.cpp",