Diff 349773

llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp

Show All 24 Lines
/// library folding opportunities.		/// library folding opportunities.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "AMDGPU.h"		#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"		#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"		#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"		#include "llvm/ADT/SmallSet.h"
		#include "llvm/Analysis/CallGraph.h"
#include "llvm/CodeGen/TargetPassConfig.h"		#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"		#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/InstrTypes.h"		#include "llvm/IR/InstrTypes.h"
#include "llvm/Target/TargetMachine.h"		#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Cloning.h"		#include "llvm/Transforms/Utils/Cloning.h"
		#include <queue>

#define DEBUG_TYPE "amdgpu-propagate-attributes"		#define DEBUG_TYPE "amdgpu-propagate-attributes"
		rampitecUnsubmitted Done Reply Inline Actions Restore blank line here. rampitec: Restore blank line here.

using namespace llvm;		using namespace llvm;

namespace llvm {		namespace llvm {
extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];		extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
}		}

namespace {		namespace {
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	public:
Function *NewF;		Function *NewF;
};		};

const TargetMachine *TM;		const TargetMachine *TM;

// Clone functions as needed or just set attributes.		// Clone functions as needed or just set attributes.
bool AllowClone;		bool AllowClone;

		CallGraph *ModuleCG = nullptr;
		foadUnsubmitted Done Reply Inline Actions Add the nullptr initializion here instead of in the constructor? foad: Add the nullptr initializion here instead of in the constructor?
		madhur13490AuthorUnsubmitted Done Reply Inline Actions May I know why? madhur13490: May I know why?
		foadUnsubmitted Done Reply Inline Actions It's slightly better style because if you have more than one constructor, you don't have to repeat the same initialization value for every constructor. It doesn't really matter if you only have one constructor. foad: It's slightly better style because if you have more than one constructor, you don't have to…

// Option propagation roots.		// Option propagation roots.
SmallSet<Function *, 32> Roots;		SmallSet<Function *, 32> Roots;

// Clones of functions with their attributes.		// Clones of functions with their attributes.
SmallVector<Clone, 32> Clones;		SmallVector<Clone, 32> Clones;

		// To memoize address taken functions.
		SmallSet<Function *, 32> AddressTakenFunctions;

// Find a clone with required features.		// Find a clone with required features.
Function *findFunction(const FnProperties &PropsNeeded,		Function *findFunction(const FnProperties &PropsNeeded,
Function *OrigF);		Function *OrigF);

// Clone function \p F and set \p NewProps on the clone.		// Clone function \p F and set \p NewProps on the clone.
// Cole takes the name of original function.		// Cole takes the name of original function.
Function *cloneWithProperties(Function &F, const FnProperties &NewProps);		Function *cloneWithProperties(Function &F, const FnProperties &NewProps);

// Set new function's features in place.		// Set new function's features in place.
void setFeatures(Function &F, const FeatureBitset &NewFeatures);		void setFeatures(Function &F, const FeatureBitset &NewFeatures);

// Set new function's attributes in place.		// Set new function's attributes in place.
void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);		void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);

std::string getFeatureString(const FeatureBitset &Features) const;		std::string getFeatureString(const FeatureBitset &Features) const;

// Propagate attributes from Roots.		// Propagate attributes from Roots.
bool process();		bool process();

public:		public:
AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :		AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone)
TM(TM), AllowClone(AllowClone) {}		: TM(TM), AllowClone(AllowClone) {}

// Use F as a root and propagate its attributes.		// Use F as a root and propagate its attributes.
bool process(Function &F);		bool process(Function &F);

// Propagate attributes starting from kernel functions.		// Propagate attributes starting from kernel functions.
bool process(Module &M);		bool process(Module &M, CallGraph *CG);

		// Remove attributes from F.
		// This is used in presence of address taken functions.
		bool removeAttributes(Function *F);

		// Handle call graph rooted at address taken functions.
		// This function will erase all attributes present
		// on all functions called from address taken functions transitively.
		bool handleAddressTakenFunctions(CallGraph *CG);
};		};

// Allows to propagate attributes early, but no clonning is allowed as it must		// Allows to propagate attributes early, but no clonning is allowed as it must
// be a function pass to run before any optimizations.		// be a function pass to run before any optimizations.
// TODO: We shall only need a one instance of module pass, but that needs to be		// TODO: We shall only need a one instance of module pass, but that needs to be
// in the linker pipeline which is currently not possible.		// in the linker pipeline which is currently not possible.
class AMDGPUPropagateAttributesEarly : public FunctionPass {		class AMDGPUPropagateAttributesEarly : public FunctionPass {
const TargetMachine *TM;		const TargetMachine *TM;
Show All 19 Lines	public:
static char ID; // Pass identification		static char ID; // Pass identification

AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :		AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
ModulePass(ID), TM(TM) {		ModulePass(ID), TM(TM) {
initializeAMDGPUPropagateAttributesLatePass(		initializeAMDGPUPropagateAttributesLatePass(
*PassRegistry::getPassRegistry());		*PassRegistry::getPassRegistry());
}		}

		void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnModule(Module &M) override;		bool runOnModule(Module &M) override;
};		};

} // end anonymous namespace.		} // end anonymous namespace.

char AMDGPUPropagateAttributesEarly::ID = 0;		char AMDGPUPropagateAttributesEarly::ID = 0;
char AMDGPUPropagateAttributesLate::ID = 0;		char AMDGPUPropagateAttributesLate::ID = 0;

INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,		INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
"amdgpu-propagate-attributes-early",		"amdgpu-propagate-attributes-early",
"Early propagate attributes from kernels to functions",		"Early propagate attributes from kernels to functions",
false, false)		false, false)
INITIALIZE_PASS(AMDGPUPropagateAttributesLate,		INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
"amdgpu-propagate-attributes-late",		"amdgpu-propagate-attributes-late",
"Late propagate attributes from kernels to functions",		"Late propagate attributes from kernels to functions",
false, false)		false, false)

		bool AMDGPUPropagateAttributes::removeAttributes(Function *F) {
		bool Changed = false;
		if (!F)
		arsenmUnsubmitted Not Done Reply Inline Actions I don't think null F can reach here arsenm: I don't think null F can reach here
		madhur13490AuthorUnsubmitted Done Reply Inline Actions One of the test cases from AOMP reached here with null F. madhur13490: One of the test cases from AOMP reached here with null F.
		return Changed;
		LLVM_DEBUG(dbgs() << "Removing attributes from " << F->getName() << '\n');
		rampitecUnsubmitted Done Reply Inline Actions \n in single quotes. rampitec: \n in single quotes.
		for (unsigned I = 0; I < NumAttr; ++I) {
		if (F->hasFnAttribute(AttributeNames[I])) {
		F->removeFnAttr(AttributeNames[I]);
		Changed = true;
		}
		}
		return Changed;
		}

		bool AMDGPUPropagateAttributes::handleAddressTakenFunctions(CallGraph *CG) {
		assert(ModuleCG && "Call graph not present");

		bool Changed = false;
		SmallSet<CallGraphNode *, 32> Visited;

		arsenmUnsubmitted Done Reply Inline Actions Just assert, the return is dead arsenm: Just assert, the return is dead
		madhur13490AuthorUnsubmitted Done Reply Inline Actions I'd say its a style choice. Preferring llvm_unreachable over assert. madhur13490: I'd say its a style choice. Preferring llvm_unreachable over assert.
		foadUnsubmitted Not Done Reply Inline Actions Then it's a bad choice :) I agree with Matt. `assert(CG)` or `assert(CG && "message")` are shorter and simpler and more familiar because they're already ubiquitous throughout llvm. foad: Then it's a bad choice :) I agree with Matt. `assert(CG)` or `assert(CG && "message")` are…
		madhur13490AuthorUnsubmitted Done Reply Inline Actions As per coding standards, llvm_unreachable seems more preferred. https://llvm.org/docs/CodingStandards.html#assert-liberally @rampitec What do you think? Internally, I changed this from assert. madhur13490: As per coding standards, llvm_unreachable seems more preferred. https://llvm.
		rampitecUnsubmitted Not Done Reply Inline Actions Internally if was "if() assert(false ...)". assert(false) shall never be used, that's why I suggested llvm_unreachange() instead. But "assert(ModuleCG);" is a more common thing. rampitec: Internally if was "if() assert(false ...)". assert(false) shall never be used, that's why I…
		madhur13490AuthorUnsubmitted Done Reply Inline Actions There are ways to implement this: A. if (!ModuleCG) { assert(false, "ModuleCG not present"); return; } <rest code> B. Just assert(ModuleCG && "Module CG not present"); < rest code> C. if (!ModuleCG) { llvm_unreachable("ModuleCG"); return; } <rest code> Matt suggests B but in release builds the code would just crash because it would end up accessing nullptr. I had A initially because it asserts as well as returns gracefully so it does not crash. C is similar to A just llvm_unreachable replaces assert. I prefer but C because it follows coding standard and also returns gracefully returns in release builds. Which one we agree the best here? madhur13490: There are ways to implement this: A. ``` if (!ModuleCG) { assert(false, "ModuleCG not…
		madhur13490AuthorUnsubmitted Done Reply Inline Actions Typo: I prefer C madhur13490: Typo: I prefer C
		arsenmUnsubmitted Not Done Reply Inline Actions C doesn't follow the coding standard and adds dead code. You also should NOT be attempting to gracefully handle invalid cases in a release build. The optimizer will happily delete this null check anyway arsenm: C doesn't follow the coding standard and adds dead code. You also should NOT be attempting to…
		for (Function *F : AddressTakenFunctions) {
		CallGraphNode CGN = (CG)[F];
		if (!Visited.count(CGN)) {
		Changed \|= removeAttributes(F);
		Visited.insert(CGN);
		}

		std::queue<CallGraphNode *> SubGraph;
		SubGraph.push(CGN);
		while (!SubGraph.empty()) {
		foadUnsubmitted Not Done Reply Inline Actions It's more common to use a SmallVector for this, with push_back and pop_back_val. foad: It's more common to use a SmallVector for this, with push_back and pop_back_val.
		madhur13490AuthorUnsubmitted Done Reply Inline Actions I need a queue. This is implementing BFS. madhur13490: I need a queue. This is implementing BFS.
		CallGraphNode *CGN = SubGraph.front();
		SubGraph.pop();
		if (!Visited.count(CGN)) {
		Changed \|= removeAttributes(CGN->getFunction());
		Visited.insert(CGN);
		}
		for (auto N : *CGN)
		SubGraph.push(N.second);
		}
		}
		return Changed;
		}

Function *		Function *
AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,		AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
Function *OrigF) {		Function *OrigF) {
// TODO: search for clone's clones.		// TODO: search for clone's clones.
for (Clone &C : Clones)		for (Clone &C : Clones)
if (C.OrigF == OrigF && PropsNeeded == C.Properties)		if (C.OrigF == OrigF && PropsNeeded == C.Properties)
return C.NewF;		return C.NewF;

return nullptr;		return nullptr;
}		}

bool AMDGPUPropagateAttributes::process(Module &M) {		bool AMDGPUPropagateAttributes::process(Module &M, CallGraph *CG) {
for (auto &F : M.functions())		for (auto &F : M.functions())
if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))		if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
Roots.insert(&F);		Roots.insert(&F);

		ModuleCG = CG;
return process();		return process();
}		}

bool AMDGPUPropagateAttributes::process(Function &F) {		bool AMDGPUPropagateAttributes::process(Function &F) {
Roots.insert(&F);		Roots.insert(&F);
return process();		return process();
}		}

Show All 9 Lines	bool AMDGPUPropagateAttributes::process() {
do {		do {
Roots.insert(NewRoots.begin(), NewRoots.end());		Roots.insert(NewRoots.begin(), NewRoots.end());
NewRoots.clear();		NewRoots.clear();

for (auto &F : M.functions()) {		for (auto &F : M.functions()) {
if (F.isDeclaration())		if (F.isDeclaration())
continue;		continue;

		if (F.hasAddressTaken(nullptr, true, true, true))
		AddressTakenFunctions.insert(&F);

const FnProperties CalleeProps(*TM, F);		const FnProperties CalleeProps(*TM, F);
SmallVector<std::pair<CallBase , Function >, 32> ToReplace;		SmallVector<std::pair<CallBase , Function >, 32> ToReplace;
SmallSet<CallBase *, 32> Visited;		SmallSet<CallBase *, 32> Visited;

for (User *U : F.users()) {		for (User *U : F.users()) {
Instruction *I = dyn_cast<Instruction>(U);		Instruction *I = dyn_cast<Instruction>(U);
if (!I)		if (!I)
continue;		continue;
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines	bool AMDGPUPropagateAttributes::process() {
for (Function *F : Replaced) {		for (Function *F : Replaced) {
if (F->use_empty())		if (F->use_empty())
F->eraseFromParent();		F->eraseFromParent();
}		}

Roots.clear();		Roots.clear();
Clones.clear();		Clones.clear();

		// Keep the post processing related to indirect
		// calls separate to handle them gracefully.
		// The core traversal need not be affected by this.
		if (AllowClone)
		Changed \|= handleAddressTakenFunctions(ModuleCG);

return Changed;		return Changed;
}		}

Function *		Function *
AMDGPUPropagateAttributes::cloneWithProperties(Function &F,		AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
const FnProperties &NewProps) {		const FnProperties &NewProps) {
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');		LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');

▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
}		}

if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))		if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
return false;		return false;

return AMDGPUPropagateAttributes(TM, false).process(F);		return AMDGPUPropagateAttributes(TM, false).process(F);
}		}

		void AMDGPUPropagateAttributesLate::getAnalysisUsage(AnalysisUsage &AU) const {
		AU.addRequired<CallGraphWrapperPass>();
		}

bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {		bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
if (!TM) {		if (!TM) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();		auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)		if (!TPC)
return false;		return false;

TM = &TPC->getTM<TargetMachine>();		TM = &TPC->getTM<TargetMachine>();
}		}
		CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
return AMDGPUPropagateAttributes(TM, true).process(M);		return AMDGPUPropagateAttributes(TM, true).process(M, &CG);
}		}

FunctionPass		FunctionPass
llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine TM) {		llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine TM) {
return new AMDGPUPropagateAttributesEarly(TM);		return new AMDGPUPropagateAttributesEarly(TM);
}		}

ModulePass		ModulePass
llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine TM) {		llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine TM) {
return new AMDGPUPropagateAttributesLate(TM);		return new AMDGPUPropagateAttributesLate(TM);
}		}

PreservedAnalyses		PreservedAnalyses
AMDGPUPropagateAttributesEarlyPass::run(Function &F,		AMDGPUPropagateAttributesEarlyPass::run(Function &F,
FunctionAnalysisManager &AM) {		FunctionAnalysisManager &AM) {
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))		if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
return PreservedAnalyses::all();		return PreservedAnalyses::all();

return AMDGPUPropagateAttributes(&TM, false).process(F)		return AMDGPUPropagateAttributes(&TM, false).process(F)
? PreservedAnalyses::none()		? PreservedAnalyses::none()
: PreservedAnalyses::all();		: PreservedAnalyses::all();
}		}

PreservedAnalyses		PreservedAnalyses
AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {		AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &MAM) {
return AMDGPUPropagateAttributes(&TM, true).process(M)		AMDGPUPropagateAttributes APA(&TM, true);
? PreservedAnalyses::none()		CallGraph &CG = MAM.getResult<CallGraphAnalysis>(M);
: PreservedAnalyses::all();		const bool Changed = APA.process(M, &CG);
		return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}		}

llvm/test/CodeGen/AMDGPU/opt-pipeline.ll

	Show First 20 Lines • Show All 59 Lines • ▼ Show 20 Lines
	; GCN-O1-NEXT: ModulePass Manager			; GCN-O1-NEXT: ModulePass Manager
	; GCN-O1-NEXT: Annotation2Metadata			; GCN-O1-NEXT: Annotation2Metadata
	; GCN-O1-NEXT: Force set function attributes			; GCN-O1-NEXT: Force set function attributes
	; GCN-O1-NEXT: Infer set function attributes			; GCN-O1-NEXT: Infer set function attributes
	; GCN-O1-NEXT: Unify multiple OpenCL metadata due to linking			; GCN-O1-NEXT: Unify multiple OpenCL metadata due to linking
	; GCN-O1-NEXT: AMDGPU Printf lowering			; GCN-O1-NEXT: AMDGPU Printf lowering
	; GCN-O1-NEXT: FunctionPass Manager			; GCN-O1-NEXT: FunctionPass Manager
	; GCN-O1-NEXT: Dominator Tree Construction			; GCN-O1-NEXT: Dominator Tree Construction
				; GCN-O1-NEXT: CallGraph Construction
	; GCN-O1-NEXT: Late propagate attributes from kernels to functions			; GCN-O1-NEXT: Late propagate attributes from kernels to functions
	; GCN-O1-NEXT: Interprocedural Sparse Conditional Constant Propagation			; GCN-O1-NEXT: Interprocedural Sparse Conditional Constant Propagation
	; GCN-O1-NEXT: FunctionPass Manager			; GCN-O1-NEXT: FunctionPass Manager
	; GCN-O1-NEXT: Dominator Tree Construction			; GCN-O1-NEXT: Dominator Tree Construction
	; GCN-O1-NEXT: Called Value Propagation			; GCN-O1-NEXT: Called Value Propagation
	; GCN-O1-NEXT: Global Variable Optimizer			; GCN-O1-NEXT: Global Variable Optimizer
	; GCN-O1-NEXT: FunctionPass Manager			; GCN-O1-NEXT: FunctionPass Manager
	; GCN-O1-NEXT: Dominator Tree Construction			; GCN-O1-NEXT: Dominator Tree Construction
	▲ Show 20 Lines • Show All 293 Lines • ▼ Show 20 Lines
	; GCN-O2-NEXT: ModulePass Manager			; GCN-O2-NEXT: ModulePass Manager
	; GCN-O2-NEXT: Annotation2Metadata			; GCN-O2-NEXT: Annotation2Metadata
	; GCN-O2-NEXT: Force set function attributes			; GCN-O2-NEXT: Force set function attributes
	; GCN-O2-NEXT: Infer set function attributes			; GCN-O2-NEXT: Infer set function attributes
	; GCN-O2-NEXT: Unify multiple OpenCL metadata due to linking			; GCN-O2-NEXT: Unify multiple OpenCL metadata due to linking
	; GCN-O2-NEXT: AMDGPU Printf lowering			; GCN-O2-NEXT: AMDGPU Printf lowering
	; GCN-O2-NEXT: FunctionPass Manager			; GCN-O2-NEXT: FunctionPass Manager
	; GCN-O2-NEXT: Dominator Tree Construction			; GCN-O2-NEXT: Dominator Tree Construction
				; GCN-O2-NEXT: CallGraph Construction
	; GCN-O2-NEXT: Late propagate attributes from kernels to functions			; GCN-O2-NEXT: Late propagate attributes from kernels to functions
	; GCN-O2-NEXT: Interprocedural Sparse Conditional Constant Propagation			; GCN-O2-NEXT: Interprocedural Sparse Conditional Constant Propagation
	; GCN-O2-NEXT: FunctionPass Manager			; GCN-O2-NEXT: FunctionPass Manager
	; GCN-O2-NEXT: Dominator Tree Construction			; GCN-O2-NEXT: Dominator Tree Construction
	; GCN-O2-NEXT: Called Value Propagation			; GCN-O2-NEXT: Called Value Propagation
	; GCN-O2-NEXT: Global Variable Optimizer			; GCN-O2-NEXT: Global Variable Optimizer
	; GCN-O2-NEXT: FunctionPass Manager			; GCN-O2-NEXT: FunctionPass Manager
	; GCN-O2-NEXT: Dominator Tree Construction			; GCN-O2-NEXT: Dominator Tree Construction
	▲ Show 20 Lines • Show All 338 Lines • ▼ Show 20 Lines
	; GCN-O3-NEXT: ModulePass Manager			; GCN-O3-NEXT: ModulePass Manager
	; GCN-O3-NEXT: Annotation2Metadata			; GCN-O3-NEXT: Annotation2Metadata
	; GCN-O3-NEXT: Force set function attributes			; GCN-O3-NEXT: Force set function attributes
	; GCN-O3-NEXT: Infer set function attributes			; GCN-O3-NEXT: Infer set function attributes
	; GCN-O3-NEXT: Unify multiple OpenCL metadata due to linking			; GCN-O3-NEXT: Unify multiple OpenCL metadata due to linking
	; GCN-O3-NEXT: AMDGPU Printf lowering			; GCN-O3-NEXT: AMDGPU Printf lowering
	; GCN-O3-NEXT: FunctionPass Manager			; GCN-O3-NEXT: FunctionPass Manager
	; GCN-O3-NEXT: Dominator Tree Construction			; GCN-O3-NEXT: Dominator Tree Construction
				; GCN-O3-NEXT: CallGraph Construction
	; GCN-O3-NEXT: Late propagate attributes from kernels to functions			; GCN-O3-NEXT: Late propagate attributes from kernels to functions
	; GCN-O3-NEXT: FunctionPass Manager			; GCN-O3-NEXT: FunctionPass Manager
	; GCN-O3-NEXT: Dominator Tree Construction			; GCN-O3-NEXT: Dominator Tree Construction
	; GCN-O3-NEXT: Call-site splitting			; GCN-O3-NEXT: Call-site splitting
	; GCN-O3-NEXT: Interprocedural Sparse Conditional Constant Propagation			; GCN-O3-NEXT: Interprocedural Sparse Conditional Constant Propagation
	; GCN-O3-NEXT: FunctionPass Manager			; GCN-O3-NEXT: FunctionPass Manager
	; GCN-O3-NEXT: Dominator Tree Construction			; GCN-O3-NEXT: Dominator Tree Construction
	; GCN-O3-NEXT: Called Value Propagation			; GCN-O3-NEXT: Called Value Propagation
	▲ Show 20 Lines • Show All 314 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll

This file was added.

				; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s \| FileCheck %s
				JonChesterfieldUnsubmitted Not Done Reply Inline Actions I can't tell what this test is looking for. What do you mean by common callees here? JonChesterfield: I can't tell what this test is looking for. What do you mean by common callees here?

				; CHECK-LABEL: define float @common() {
				define float @common() {
				ret float 0.0
				}

				; CHECK-LABEL: define float @foo() {
				define float @foo() {
				%direct_call = call contract float @common()
				ret float %direct_call
				}

				; CHECK-LABEL: define float @bar() {
				define float @bar() {
				ret float 0.0
				}

				; CHECK-LABEL: define float @baz() {
				define float @baz() {
				ret float 0.0
				}

				define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
				%fn = alloca float ()*
				switch i32 %type, label %sw.default [
				i32 1, label %sw.bb
				i32 2, label %sw.bb2
				i32 3, label %sw.bb3
				]

				sw.bb:
				store float ()* @foo, float ()** %fn
				br label %sw.epilog

				sw.bb2:
				store float ()* @bar, float ()** %fn
				br label %sw.epilog

				sw.bb3:
				store float ()* @baz, float ()** %fn
				br label %sw.epilog

				sw.default:
				br label %sw.epilog

				sw.epilog:
				%fp = load float (), float ()* %fn
				%direct_call = call contract float @common()
				%indirect_call = call contract float %fp()
				store float %indirect_call, float* %result
				ret void
				}

				attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }

llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll

This file was added.

				; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s \| FileCheck %s

				; Test to check if we skip propgating attributes even if
				; a function is called directly as well as
				; indirectly. "baz" is called directly as well indirectly.

				; CHECK-LABEL: define float @foo()
				define float @foo() #1 {
				ret float 0.0
				}

				; CHECK-LABEL: define float @bar()
				define float @bar() #1 {
				ret float 0.0
				}

				; CHECK-LABEL: define float @baz()
				define float @baz() #1 {
				ret float 0.0
				}

				define amdgpu_kernel void @switch_indirect_kernel(float *%result, i32 %type) #1 {
				%fn = alloca float ()*
				switch i32 %type, label %sw.default [
				i32 1, label %sw.bb
				i32 2, label %sw.bb2
				i32 3, label %sw.bb3
				]

				sw.bb:
				store float ()* @foo, float ()** %fn
				br label %sw.epilog

				sw.bb2:
				store float ()* @bar, float ()** %fn
				br label %sw.epilog

				sw.bb3:
				store float ()* @baz, float ()** %fn
				br label %sw.epilog

				sw.default:
				br label %sw.epilog

				sw.epilog:
				%fp = load float (), float ()* %fn
				%direct_call = call contract float @baz()
				%indirect_call = call contract float %fp()
				store float %indirect_call, float* %result
				ret void
				}

				attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }

llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll

	Show All 24 Lines

	define private void @f(void ()* nocapture %0) #0 {			define private void @f(void ()* nocapture %0) #0 {
	ret void			ret void
	}			}

	; In order to expose this bug, it is necessary that `g` have one of the			; In order to expose this bug, it is necessary that `g` have one of the
	; propagated attributes, so that a clone and substitution would take place if g			; propagated attributes, so that a clone and substitution would take place if g
	; were actually the function being called.			; were actually the function being called.
	; CHECK-DAG: define private void @g.1() #1			; CHECK-DAG: define private void @g.1() #0
	; CHECK-DAG: define internal void @g() #2			; CHECK-DAG: define internal void @g() #1
	define private void @g() #1 {			define private void @g() #1 {
	ret void			ret void
	}			}

	attributes #0 = { noinline }			attributes #0 = { noinline }
	attributes #1 = { noinline "amdgpu-waves-per-eu"="1,10" }			attributes #1 = { noinline "amdgpu-waves-per-eu"="1,10" }

				; CHECK: attributes #0 = { noinline }
				; CHECK-NEXT: attributes #1 = { noinline "target-features"="+enable-ds128,+enable-prt-strict-null,+flat-address-space,+flat-for-global,+load-store-opt,+promote-alloca,+trap-handler,+unaligned-access-mode,-wavefrontsize16,-wavefrontsize32,+wavefrontsize64" }

llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll

This file was added.

				; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s \| FileCheck %s

				; Test to check if we skip attributes on address
				; taken functions and its callees.

				; CHECK-LABEL: define float @bar() {
				define float @bar() {
				ret float 0.0
				}

				; CHECK-LABEL: define float @baz() {
				define float @baz() {
				ret float 0.0
				}

				; CHECK-LABEL: define float @foo() {
				define float @foo() {
				%v1 = call contract float @bar()
				%v2 = call contract float @baz()
				%v3 = fadd float %v1, %v2
				ret float %v3
				}

				; CHECK-LABEL: define amdgpu_kernel void @kernel(float* %result, i32 %type) #0 {
				define amdgpu_kernel void @kernel(float *%result, i32 %type) #1 {
				%fn = alloca float ()*
				store float ()* @foo, float ()** %fn
				%fp = load float (), float ()* %fn
				%indirect_call = call contract float %fp()
				store float %indirect_call, float* %result
				ret void
				}

				attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }

llvm/test/CodeGen/AMDGPU/propagate-attributes-launch-bounds.ll

This file was added.

				; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s \| FileCheck %s

				; Test attributes on a function which
				; is called indirectly from two kernels
				; having different launch bounds.

				; This function should not have any attributes on it.
				; CHECK-LABEL: define float @foo() {
				arsenmUnsubmitted Done Reply Inline Actions Add a comment that there should be no attribute set arsenm: Add a comment that there should be no attribute set
				define float @foo() {
				ret float 0.0
				}

				define amdgpu_kernel void @kernel1(float *%result, i32 %type) #1 {
				%fn = alloca float ()*
				store float ()* @foo, float ()** %fn
				%fp = load float (), float ()* %fn
				%indirect_call = call contract float %fp()
				store float %indirect_call, float* %result
				ret void
				}

				define amdgpu_kernel void @kernel2(float *%result, i32 %type) #2 {
				%fn = alloca float ()*
				store float ()* @foo, float ()** %fn
				%fp = load float (), float ()* %fn
				%indirect_call = call contract float %fp()
				store float %indirect_call, float* %result
				ret void
				}

				attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
				attributes #2 = { "amdgpu-flat-work-group-size"="1,512" }

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] [IndirectCalls] Don't propagate attributes to address taken functions and their callees
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 349773

llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp

llvm/test/CodeGen/AMDGPU/opt-pipeline.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-launch-bounds.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] [IndirectCalls] Don't propagate attributes to address taken functions and their calleesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 349773

llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp

llvm/test/CodeGen/AMDGPU/opt-pipeline.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-common-callees.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-direct-indirect-common-callee.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-function-pointer-argument.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-indirect.ll

llvm/test/CodeGen/AMDGPU/propagate-attributes-launch-bounds.ll

[AMDGPU] [IndirectCalls] Don't propagate attributes to address taken functions and their callees
ClosedPublic