Diff 48502

lib/Target/AMDGPU/AMDGPU.h

	Show First 20 Lines • Show All 77 Lines • ▼ Show 20 Lines
	void initializeAMDGPUPromoteAllocaPass(PassRegistry&);			void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
	extern char &AMDGPUPromoteAllocaID;			extern char &AMDGPUPromoteAllocaID;

	Pass *createAMDGPUStructurizeCFGPass();			Pass *createAMDGPUStructurizeCFGPass();
	FunctionPass *createAMDGPUISelDag(TargetMachine &tm);			FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
	ModulePass *createAMDGPUAlwaysInlinePass();			ModulePass *createAMDGPUAlwaysInlinePass();
	ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();			ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
	FunctionPass *createAMDGPUAnnotateUniformValues();			FunctionPass *createAMDGPUAnnotateUniformValues();
				FunctionPass *createAMDGPUInsertDebugNopsPass();
				FunctionPass *createAMDGPULowerDebugNopsPass();

	void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);			void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
	extern char &SIFixControlFlowLiveIntervalsID;			extern char &SIFixControlFlowLiveIntervalsID;

	void initializeSIFixSGPRLiveRangesPass(PassRegistry&);			void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
	extern char &SIFixSGPRLiveRangesID;			extern char &SIFixSGPRLiveRangesID;

	void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);			void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
	extern char &AMDGPUAnnotateUniformValuesPassID;			extern char &AMDGPUAnnotateUniformValuesPassID;

				void initializeAMDGPUInsertDebugNopsPass(PassRegistry&);
				extern char &AMDGPUInsertDebugNopsID;

				void initializeAMDGPULowerDebugNopsPass(PassRegistry&);
				extern char &AMDGPULowerDebugNopsID;

	void initializeSIAnnotateControlFlowPass(PassRegistry&);			void initializeSIAnnotateControlFlowPass(PassRegistry&);
	extern char &SIAnnotateControlFlowPassID;			extern char &SIAnnotateControlFlowPassID;

	void initializeSIInsertWaitsPass(PassRegistry&);			void initializeSIInsertWaitsPass(PassRegistry&);
	extern char &SIInsertWaitsID;			extern char &SIInsertWaitsID;

	extern Target TheAMDGPUTarget;			extern Target TheAMDGPUTarget;
	extern Target TheGCNTarget;			extern Target TheGCNTarget;
	▲ Show 20 Lines • Show All 70 Lines • Show Last 20 Lines

lib/Target/AMDGPU/AMDGPUInsertNopsPass.cpp

This file was added.

				//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
				tstellarAMDUnsubmitted Done Reply Inline Actions The file name in the header is wrong, but I think the file and the class names should be renamed to use the 'SI' prefix rather than the 'AMDGPU' prefix to be consistent with other GCN only passes. tstellarAMD: The file name in the header is wrong, but I think the file and the class names should be…
				kzhuravlAuthorUnsubmitted Done Reply Inline Actions ok kzhuravl: ok
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				//
				/// \file
				/// These passes insert S_NOP instruction for each high level source statement.
				/// AMDGPUInsertDebugNops pass inserts DEBUG_NOP pseudo instructions before
				/// register allocation. AMDGPULowerDebugNops pass lowers DEBUG_NOP instructions
				/// to S_NOP instructions before machine code is emitted.
				///
				/// S_NOP for each high level source statement is needed for tools (i.e.
				/// debugger, profiler), which overwrite S_NOPs with S_TRAPs as they see fit.
				//
				//===----------------------------------------------------------------------===//
				tstellarAMDUnsubmitted Done Reply Inline Actions Why do we need two passes. Can't we just insert the S_NOP instructions in the first pass? tstellarAMD: Why do we need two passes. Can't we just insert the S_NOP instructions in the first pass?
				kzhuravlAuthorUnsubmitted Done Reply Inline Actions this should work with different optimization levels. for o0 one pass works fine. in other opt levels instructions are reordered at different compilation stages. first pass inserts DEBUG_NOP pseudo instructions before register allocation. DEBUG_NOP pseudo instruction has isTerminator attribute, which makes reordering across DEBUG_NOPs not possible. second pass lowers DEBUG_NOPs to S_NOPs right before machine code is emitted. kzhuravl: this should work with different optimization levels. for o0 one pass works fine. in other opt…
				tstellarAMDUnsubmitted Done Reply Inline Actions By the time we get to running the first pass, the code will have already been re-ordered by the LLVM IR passes as well as the SelectionDAG. We also can't insert instructions with terminators in the middle of blocks, because this will break other passes (and the verifier). Can we start with one pass and if the result isn't good enough then maybe look for other solutions? tstellarAMD: By the time we get to running the first pass, the code will have already been re-ordered by the…
				kzhuravlAuthorUnsubmitted Not Done Reply Inline Actions After discussion with Tools, it was decided to insert two S_NOPs for each high level source statement, this way we do not have to disable any optimizations in non-O0 opt levels. One S_NOP is inserted before first isa instruction of high level source stmt and after last isa instruction of high level source stmt. Updated the diff which includes one pass kzhuravl: After discussion with Tools, it was decided to insert two S_NOPs for each high level source…
				#include "AMDGPU.h"
				#include "AMDGPUInstrInfo.h"
				#include "AMDGPUSubtarget.h"
				#include "llvm/CodeGen/MachineFunction.h"
				#include "llvm/CodeGen/MachineFunctionPass.h"
				#include "llvm/CodeGen/MachineInstrBuilder.h"
				using namespace llvm;

				//===----------------------------------------------------------------------===//
				// AMDGPU Insert Debug Nops Pass
				//===----------------------------------------------------------------------===//

				#define DEBUG_TYPE "amdgpu-insert-debug-nops"
				#define PASS_NAME "AMDGPU Insert Debug Nops"

				namespace {

				class AMDGPUInsertDebugNops : public MachineFunctionPass {
				public:
				static char ID;

				AMDGPUInsertDebugNops() : MachineFunctionPass(ID) { }
				const char *getPassName() const override { return PASS_NAME; }

				bool runOnMachineFunction(MachineFunction &MF) override;
				};

				} // anonymous namespace

				INITIALIZE_PASS_BEGIN(
				AMDGPUInsertDebugNops, DEBUG_TYPE, PASS_NAME, false, false)
				INITIALIZE_PASS_END(
				AMDGPUInsertDebugNops, DEBUG_TYPE, PASS_NAME, false, false)

				char AMDGPUInsertDebugNops::ID = 0;
				char &llvm::AMDGPUInsertDebugNopsID = AMDGPUInsertDebugNops::ID;

				FunctionPass *llvm::createAMDGPUInsertDebugNopsPass() {
				return new AMDGPUInsertDebugNops();
				}

				bool AMDGPUInsertDebugNops::runOnMachineFunction(MachineFunction &MF) {
				const AMDGPUInstrInfo *TII =
				static_cast<const AMDGPUInstrInfo*>(MF.getSubtarget().getInstrInfo());

				unsigned prevLine = 0;
				bool modified = false;
				for (auto &CMB : MF) {
				for (auto &CMI : CMB) {
				if (!CMI.isDebugValue() && CMI.getDebugLoc() &&
				prevLine != CMI.getDebugLoc().getLine()) {
				BuildMI(CMB, CMI, CMI.getDebugLoc(), TII->get(AMDGPU::DEBUG_NOP));
				prevLine = CMI.getDebugLoc().getLine();
				modified = true;
				}
				}
				}

				return modified;
				}

				#undef DEBUG_TYPE
				#undef PASS_NAME

				//===----------------------------------------------------------------------===//
				// AMDGPU Lower Debug Nops Pass
				//===----------------------------------------------------------------------===//

				#define DEBUG_TYPE "amdgpu-lower-debug-nops"
				#define PASS_NAME "AMDGPU Lower Debug Nops"

				namespace {

				class AMDGPULowerDebugNops : public MachineFunctionPass {
				public:
				static char ID;

				AMDGPULowerDebugNops() : MachineFunctionPass(ID) { }
				const char *getPassName() const override { return PASS_NAME; }

				bool runOnMachineFunction(MachineFunction &MF) override;
				};

				} // anonymous namespace

				INITIALIZE_PASS_BEGIN(
				AMDGPULowerDebugNops, DEBUG_TYPE, PASS_NAME, false, false)
				INITIALIZE_PASS_END(
				AMDGPULowerDebugNops, DEBUG_TYPE, PASS_NAME, false, false)

				char AMDGPULowerDebugNops::ID = 0;
				char &llvm::AMDGPULowerDebugNopsID = AMDGPULowerDebugNops::ID;

				FunctionPass *llvm::createAMDGPULowerDebugNopsPass() {
				return new AMDGPULowerDebugNops();
				}

				bool AMDGPULowerDebugNops::runOnMachineFunction(MachineFunction &MF) {
				const AMDGPUInstrInfo *TII =
				static_cast<const AMDGPUInstrInfo*>(MF.getSubtarget().getInstrInfo());

				bool modified = false;
				for (auto &CMB : MF) {
				tstellarAMDUnsubmitted Done Reply Inline Actions Style comment. We usually use MBB as a variable name when iterating over blocks and MI when iterating over Machine Instructions. This will make it more obvious what the code is doing. tstellarAMD: Style comment. We usually use MBB as a variable name when iterating over blocks and MI when…
				kzhuravlAuthorUnsubmitted Done Reply Inline Actions ok kzhuravl: ok
				auto CMI = CMB.begin();
				while (CMI != CMB.end()) {
				if (TII->get(CMI->getOpcode()).TSFlags & AMDGPU_FLAG_DEBUG_NOP) {
				BuildMI(CMB, *CMI, CMI->getDebugLoc(), TII->get(AMDGPU::S_NOP))
				.addImm(0);
				CMI = CMB.erase(CMI);
				modified = true;
				} else {
				++CMI;
				}
				}
				}

				if (modified) {
				BuildMI(MF.front(), MF.front().front(), DebugLoc(), TII->get(AMDGPU::S_NOP))
				.addImm(0);
				}

				return modified;
				}

				#undef DEBUG_TYPE
				#undef PASS_NAME

lib/Target/AMDGPU/AMDGPUInstrInfo.h

Show First 20 Lines • Show All 88 Lines • ▼ Show 20 Lines	namespace AMDGPU {
LLVM_READONLY		LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);		int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
} // End namespace AMDGPU		} // End namespace AMDGPU

} // End llvm namespace		} // End llvm namespace

#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)		#define AMDGPU_FLAG_REGISTER_LOAD (UINT64_C(1) << 63)
#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)		#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
		#define AMDGPU_FLAG_DEBUG_NOP (UINT64_C(1) << 61)

#endif		#endif

lib/Target/AMDGPU/AMDGPUInstructions.td

Show All 9 Lines
// This file contains instruction defs that are common to all hw codegen		// This file contains instruction defs that are common to all hw codegen
// targets.		// targets.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {		class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
field bit isRegisterLoad = 0;		field bit isRegisterLoad = 0;
field bit isRegisterStore = 0;		field bit isRegisterStore = 0;
		field bit isDebugNop = 0;

let Namespace = "AMDGPU";		let Namespace = "AMDGPU";
let OutOperandList = outs;		let OutOperandList = outs;
let InOperandList = ins;		let InOperandList = ins;
let AsmString = asm;		let AsmString = asm;
let Pattern = pattern;		let Pattern = pattern;
let Itinerary = NullALU;		let Itinerary = NullALU;

let TSFlags{63} = isRegisterLoad;		let TSFlags{63} = isRegisterLoad;
let TSFlags{62} = isRegisterStore;		let TSFlags{62} = isRegisterStore;
		let TSFlags{61} = isDebugNop;
}		}

class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>		class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
: AMDGPUInst<outs, ins, asm, pattern> {		: AMDGPUInst<outs, ins, asm, pattern> {

field bits<32> Inst = 0xffffffff;		field bits<32> Inst = 0xffffffff;

}		}
▲ Show 20 Lines • Show All 420 Lines • ▼ Show 20 Lines	def RegisterStore : AMDGPUShaderInst <
"RegisterStore $val, $addr",		"RegisterStore $val, $addr",
[(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]		[(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
> {		> {
let isRegisterStore = 1;		let isRegisterStore = 1;
}		}
}		}
}		}

		def DEBUG_NOP : AMDGPUShaderInst<(outs), (ins), "DEBUG_NOP", []> {
		let isDebugNop = 1;
		let isTerminator = 1;
		}

} // End isCodeGenOnly = 1, isPseudo = 1		} // End isCodeGenOnly = 1, isPseudo = 1

/* Generic helper patterns for intrinsics */		/* Generic helper patterns for intrinsics */
/* -------------------------------------- */		/* -------------------------------------- */

class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>		class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
: Pat <		: Pat <
(fpow f32:$src0, f32:$src1),		(fpow f32:$src0, f32:$src1),
▲ Show 20 Lines • Show All 145 Lines • Show Last 20 Lines

lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Show All 24 Lines
#include "llvm/Analysis/Passes.h"		#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"		#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"		#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/MachineModuleInfo.h"		#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"		#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Verifier.h"		#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"		#include "llvm/MC/MCAsmInfo.h"
#include "llvm/IR/LegacyPassManager.h"		#include "llvm/IR/LegacyPassManager.h"
		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"		#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_os_ostream.h"		#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Transforms/IPO.h"		#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"		#include "llvm/Transforms/Scalar.h"
#include <llvm/CodeGen/Passes.h>		#include <llvm/CodeGen/Passes.h>

using namespace llvm;		using namespace llvm;

extern "C" void LLVMInitializeAMDGPUTarget() {		extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target		// Register the target
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);		RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);		RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);

PassRegistry *PR = PassRegistry::getPassRegistry();		PassRegistry *PR = PassRegistry::getPassRegistry();
initializeSILowerI1CopiesPass(*PR);		initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);		initializeSIFixSGPRCopiesPass(*PR);
initializeSIFoldOperandsPass(*PR);		initializeSIFoldOperandsPass(*PR);
initializeSIFixSGPRLiveRangesPass(*PR);		initializeSIFixSGPRLiveRangesPass(*PR);
initializeSIFixControlFlowLiveIntervalsPass(*PR);		initializeSIFixControlFlowLiveIntervalsPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);		initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);		initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);		initializeAMDGPUAnnotateUniformValuesPass(*PR);
		initializeAMDGPUInsertDebugNopsPass(*PR);
		initializeAMDGPULowerDebugNopsPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);		initializeAMDGPUPromoteAllocaPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);		initializeSIAnnotateControlFlowPass(*PR);
initializeSIInsertWaitsPass(*PR);		initializeSIInsertWaitsPass(*PR);
initializeSILowerControlFlowPass(*PR);		initializeSILowerControlFlowPass(*PR);
}		}

static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {		static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.getOS() == Triple::AMDHSA)		if (TT.getOS() == Triple::AMDHSA)
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines	GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
CodeModel::Model CM, CodeGenOpt::Level OL)		CodeModel::Model CM, CodeGenOpt::Level OL)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}		: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// AMDGPU Pass Setup		// AMDGPU Pass Setup
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

namespace {		namespace {

		cl::opt<bool> InsertNops(
		"amdgpu-insert-nops",
		cl::desc("Insert nop instruction for each high level source statement"),
		cl::init(false));

class AMDGPUPassConfig : public TargetPassConfig {		class AMDGPUPassConfig : public TargetPassConfig {
public:		public:
AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)		AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {		: TargetPassConfig(TM, PM) {

// Exceptions and StackMaps are not supported, so these passes will never do		// Exceptions and StackMaps are not supported, so these passes will never do
// anything.		// anything.
disablePass(&StackMapLivenessID);		disablePass(&StackMapLivenessID);
Show All 14 Lines	createMachineScheduler(MachineSchedContext *C) const override {
return nullptr;		return nullptr;
}		}

void addIRPasses() override;		void addIRPasses() override;
void addCodeGenPrepare() override;		void addCodeGenPrepare() override;
bool addPreISel() override;		bool addPreISel() override;
bool addInstSelector() override;		bool addInstSelector() override;
bool addGCPasses() override;		bool addGCPasses() override;
		void addPreRegAlloc() override;
		void addPreEmitPass() override;
};		};

class R600PassConfig : public AMDGPUPassConfig {		class R600PassConfig : public AMDGPUPassConfig {
public:		public:
R600PassConfig(TargetMachine *TM, PassManagerBase &PM)		R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
: AMDGPUPassConfig(TM, PM) { }		: AMDGPUPassConfig(TM, PM) { }

bool addPreISel() override;		bool addPreISel() override;
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines	bool AMDGPUPassConfig::addInstSelector() {
return false;		return false;
}		}

bool AMDGPUPassConfig::addGCPasses() {		bool AMDGPUPassConfig::addGCPasses() {
// Do nothing. GC is not supported.		// Do nothing. GC is not supported.
return false;		return false;
}		}

		void AMDGPUPassConfig::addPreRegAlloc() {
		if (InsertNops) {
		tstellarAMDUnsubmitted Done Reply Inline Actions We should also be running this pass if the user specifies -g. tstellarAMD: We should also be running this pass if the user specifies -g.
		kzhuravlAuthorUnsubmitted Done Reply Inline Actions tools team specifically asked not to do this. for example, for profiling they want to run it with -g, but without inserting nops kzhuravl: tools team specifically asked not to do this. for example, for profiling they want to run it…
		tstellarAMDUnsubmitted Done Reply Inline Actions Ok, so the nops are required for both debugging and profiling? Is the command line flag you added accessible from clang? tstellarAMD: Ok, so the nops are required for both debugging and profiling? Is the command line flag you…
		kzhuravlAuthorUnsubmitted Not Done Reply Inline Actions nops are only required for debugging. for profiling they want to have debug info, but no nops. command line flag is accessible from clang kzhuravl: nops are only required for debugging. for profiling they want to have debug info, but no nops.
		addPass(createAMDGPUInsertDebugNopsPass());
		tstellarAMDUnsubmitted Done Reply Inline Actions This should be added to GCNPassConfig since it is GCN only. tstellarAMD: This should be added to GCNPassConfig since it is GCN only.
		kzhuravlAuthorUnsubmitted Done Reply Inline Actions ok kzhuravl: ok
		}
		}

		void AMDGPUPassConfig::addPreEmitPass() {
		if (InsertNops) {
		addPass(createAMDGPULowerDebugNopsPass());
		}
		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// R600 Pass Setup		// R600 Pass Setup
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

bool R600PassConfig::addPreISel() {		bool R600PassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();		AMDGPUPassConfig::addPreISel();
addPass(createR600TextureIntrinsicsReplacer());		addPass(createR600TextureIntrinsicsReplacer());
return false;		return false;
}		}

void R600PassConfig::addPreRegAlloc() {		void R600PassConfig::addPreRegAlloc() {
		AMDGPUPassConfig::addPreRegAlloc();
addPass(createR600VectorRegMerger(*TM));		addPass(createR600VectorRegMerger(*TM));
}		}

void R600PassConfig::addPreSched2() {		void R600PassConfig::addPreSched2() {
const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();		const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createR600EmitClauseMarkers(), false);		addPass(createR600EmitClauseMarkers(), false);
if (ST.isIfCvtEnabled())		if (ST.isIfCvtEnabled())
addPass(&IfConverterID, false);		addPass(&IfConverterID, false);
addPass(createR600ClauseMergePass(*TM), false);		addPass(createR600ClauseMergePass(*TM), false);
}		}

void R600PassConfig::addPreEmitPass() {		void R600PassConfig::addPreEmitPass() {
		AMDGPUPassConfig::addPreEmitPass();
addPass(createAMDGPUCFGStructurizerPass(), false);		addPass(createAMDGPUCFGStructurizerPass(), false);
addPass(createR600ExpandSpecialInstrsPass(*TM), false);		addPass(createR600ExpandSpecialInstrsPass(*TM), false);
addPass(&FinalizeMachineBundlesID, false);		addPass(&FinalizeMachineBundlesID, false);
addPass(createR600Packetizer(*TM), false);		addPass(createR600Packetizer(*TM), false);
addPass(createR600ControlFlowFinalizer(*TM), false);		addPass(createR600ControlFlowFinalizer(*TM), false);
}		}

TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {		TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
Show All 23 Lines	bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();		AMDGPUPassConfig::addInstSelector();
addPass(createSILowerI1CopiesPass());		addPass(createSILowerI1CopiesPass());
addPass(&SIFixSGPRCopiesID);		addPass(&SIFixSGPRCopiesID);
addPass(createSIFoldOperandsPass());		addPass(createSIFoldOperandsPass());
return false;		return false;
}		}

void GCNPassConfig::addPreRegAlloc() {		void GCNPassConfig::addPreRegAlloc() {
		AMDGPUPassConfig::addPreRegAlloc();

const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();		const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();

// This needs to be run directly before register allocation because		// This needs to be run directly before register allocation because
// earlier passes might recompute live intervals.		// earlier passes might recompute live intervals.
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass		// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
if (getOptLevel() > CodeGenOpt::None) {		if (getOptLevel() > CodeGenOpt::None) {
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);		insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
}		}
Show All 27 Lines
void GCNPassConfig::addPostRegAlloc() {		void GCNPassConfig::addPostRegAlloc() {
addPass(createSIShrinkInstructionsPass(), false);		addPass(createSIShrinkInstructionsPass(), false);
}		}

void GCNPassConfig::addPreSched2() {		void GCNPassConfig::addPreSched2() {
}		}

void GCNPassConfig::addPreEmitPass() {		void GCNPassConfig::addPreEmitPass() {
		AMDGPUPassConfig::addPreEmitPass();

addPass(createSIInsertWaitsPass(), false);		addPass(createSIInsertWaitsPass(), false);
addPass(createSILowerControlFlowPass(), false);		addPass(createSILowerControlFlowPass(), false);
}		}

TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {		TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
return new GCNPassConfig(this, PM);		return new GCNPassConfig(this, PM);
}		}

lib/Target/AMDGPU/CMakeLists.txt

	Show All 14 Lines
	add_llvm_target(AMDGPUCodeGen			add_llvm_target(AMDGPUCodeGen
	AMDILCFGStructurizer.cpp			AMDILCFGStructurizer.cpp
	AMDGPUAlwaysInlinePass.cpp			AMDGPUAlwaysInlinePass.cpp
	AMDGPUAnnotateKernelFeatures.cpp			AMDGPUAnnotateKernelFeatures.cpp
	AMDGPUAnnotateUniformValues.cpp			AMDGPUAnnotateUniformValues.cpp
	AMDGPUAsmPrinter.cpp			AMDGPUAsmPrinter.cpp
	AMDGPUFrameLowering.cpp			AMDGPUFrameLowering.cpp
	AMDGPUTargetObjectFile.cpp			AMDGPUTargetObjectFile.cpp
				AMDGPUInsertNopsPass.cpp
	AMDGPUIntrinsicInfo.cpp			AMDGPUIntrinsicInfo.cpp
	AMDGPUISelDAGToDAG.cpp			AMDGPUISelDAGToDAG.cpp
	AMDGPUMCInstLower.cpp			AMDGPUMCInstLower.cpp
	AMDGPUMachineFunction.cpp			AMDGPUMachineFunction.cpp
	AMDGPUOpenCLImageTypeLoweringPass.cpp			AMDGPUOpenCLImageTypeLoweringPass.cpp
	AMDGPUSubtarget.cpp			AMDGPUSubtarget.cpp
	AMDGPUTargetMachine.cpp			AMDGPUTargetMachine.cpp
	AMDGPUTargetTransformInfo.cpp			AMDGPUTargetTransformInfo.cpp
	Show All 40 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Insert two S_NOP instructions for every high level source statement.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 48502

lib/Target/AMDGPU/AMDGPU.h

lib/Target/AMDGPU/AMDGPUInsertNopsPass.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.h

lib/Target/AMDGPU/AMDGPUInstructions.td

lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

lib/Target/AMDGPU/CMakeLists.txt

This is an archive of the discontinued LLVM Phabricator instance.

Insert two S_NOP instructions for every high level source statement.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 48502

lib/Target/AMDGPU/AMDGPU.h

lib/Target/AMDGPU/AMDGPUInsertNopsPass.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.h

lib/Target/AMDGPU/AMDGPUInstructions.td

lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

lib/Target/AMDGPU/CMakeLists.txt

Insert two S_NOP instructions for every high level source statement.
ClosedPublic