Changeset View
Changeset View
Standalone View
Standalone View
lib/Target/NVPTX/NVPTXTargetMachine.cpp
Show First 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | |||||
void initializeNVVMIntrRangePass(PassRegistry&); | void initializeNVVMIntrRangePass(PassRegistry&); | ||||
void initializeNVVMReflectPass(PassRegistry&); | void initializeNVVMReflectPass(PassRegistry&); | ||||
void initializeGenericToNVVMPass(PassRegistry&); | void initializeGenericToNVVMPass(PassRegistry&); | ||||
void initializeNVPTXAllocaHoistingPass(PassRegistry &); | void initializeNVPTXAllocaHoistingPass(PassRegistry &); | ||||
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); | void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); | ||||
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); | void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); | ||||
void initializeNVPTXLowerArgsPass(PassRegistry &); | void initializeNVPTXLowerArgsPass(PassRegistry &); | ||||
void initializeNVPTXLowerAllocaPass(PassRegistry &); | void initializeNVPTXLowerAllocaPass(PassRegistry &); | ||||
void initializeNVPTXFunctionDataSharingPass(PassRegistry &); | |||||
} // end namespace llvm | } // end namespace llvm | ||||
extern "C" void LLVMInitializeNVPTXTarget() { | extern "C" void LLVMInitializeNVPTXTarget() { | ||||
// Register the target. | // Register the target. | ||||
RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32()); | RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32()); | ||||
RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64()); | RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64()); | ||||
// FIXME: This pass is really intended to be invoked during IR optimization, | // FIXME: This pass is really intended to be invoked during IR optimization, | ||||
// but it's very NVPTX-specific. | // but it's very NVPTX-specific. | ||||
PassRegistry &PR = *PassRegistry::getPassRegistry(); | PassRegistry &PR = *PassRegistry::getPassRegistry(); | ||||
initializeNVVMReflectPass(PR); | initializeNVVMReflectPass(PR); | ||||
initializeNVVMIntrRangePass(PR); | initializeNVVMIntrRangePass(PR); | ||||
initializeGenericToNVVMPass(PR); | initializeGenericToNVVMPass(PR); | ||||
initializeNVPTXAllocaHoistingPass(PR); | initializeNVPTXAllocaHoistingPass(PR); | ||||
initializeNVPTXAssignValidGlobalNamesPass(PR); | initializeNVPTXAssignValidGlobalNamesPass(PR); | ||||
initializeNVPTXLowerArgsPass(PR); | initializeNVPTXLowerArgsPass(PR); | ||||
initializeNVPTXLowerAllocaPass(PR); | initializeNVPTXLowerAllocaPass(PR); | ||||
initializeNVPTXFunctionDataSharingPass(PR); | |||||
initializeNVPTXLowerAggrCopiesPass(PR); | initializeNVPTXLowerAggrCopiesPass(PR); | ||||
} | } | ||||
static std::string computeDataLayout(bool is64Bit) { | static std::string computeDataLayout(bool is64Bit) { | ||||
std::string Ret = "e"; | std::string Ret = "e"; | ||||
if (!is64Bit) | if (!is64Bit) | ||||
Ret += "-p:32:32"; | Ret += "-p:32:32"; | ||||
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines | public: | ||||
NVPTXTargetMachine &getNVPTXTargetMachine() const { | NVPTXTargetMachine &getNVPTXTargetMachine() const { | ||||
return getTM<NVPTXTargetMachine>(); | return getTM<NVPTXTargetMachine>(); | ||||
} | } | ||||
void addIRPasses() override; | void addIRPasses() override; | ||||
bool addInstSelector() override; | bool addInstSelector() override; | ||||
void addPostRegAlloc() override; | void addPostRegAlloc() override; | ||||
void addMachineSSAOptimization() override; | void addMachineSSAOptimization() override; | ||||
void addMachineSSALowering() override; | |||||
FunctionPass *createTargetRegisterAllocator(bool) override; | FunctionPass *createTargetRegisterAllocator(bool) override; | ||||
void addFastRegAlloc(FunctionPass *RegAllocPass) override; | void addFastRegAlloc(FunctionPass *RegAllocPass) override; | ||||
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; | void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; | ||||
private: | private: | ||||
// If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This | // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This | ||||
// function is only called in opt mode. | // function is only called in opt mode. | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | if (getOptLevel() != CodeGenOpt::None) | ||||
addPass(createNVPTXImageOptimizerPass()); | addPass(createNVPTXImageOptimizerPass()); | ||||
addPass(createNVPTXAssignValidGlobalNamesPass()); | addPass(createNVPTXAssignValidGlobalNamesPass()); | ||||
addPass(createGenericToNVVMPass()); | addPass(createGenericToNVVMPass()); | ||||
// NVPTXLowerArgs is required for correctness and should be run right | // NVPTXLowerArgs is required for correctness and should be run right | ||||
// before the address space inference passes. | // before the address space inference passes. | ||||
addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine())); | addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine())); | ||||
if (getOptLevel() != CodeGenOpt::None) { | if (getOptLevel() != CodeGenOpt::None) { | ||||
// Add address space inference passes | |||||
addAddressSpaceInferencePasses(); | addAddressSpaceInferencePasses(); | ||||
if (!DisableLoadStoreVectorizer) | if (!DisableLoadStoreVectorizer) | ||||
addPass(createLoadStoreVectorizerPass()); | addPass(createLoadStoreVectorizerPass()); | ||||
addStraightLineScalarOptimizationPasses(); | addStraightLineScalarOptimizationPasses(); | ||||
} else { | |||||
// When the shared depot is generated, even when no optimizations are | |||||
// used, we need to lower certain alloca instructions to the appropriate | |||||
// memory type for correctness. | |||||
addPass(createNVPTXFunctionDataSharingPass(&getNVPTXTargetMachine())); | |||||
} | } | ||||
// === LSR and other generic IR passes === | // === LSR and other generic IR passes === | ||||
TargetPassConfig::addIRPasses(); | TargetPassConfig::addIRPasses(); | ||||
// EarlyCSE is not always strong enough to clean up what LSR produces. For | // EarlyCSE is not always strong enough to clean up what LSR produces. For | ||||
// example, GVN can combine | // example, GVN can combine | ||||
// | // | ||||
// %0 = add %a, %b | // %0 = add %a, %b | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { | ||||
addPass(&StackSlotColoringID); | addPass(&StackSlotColoringID); | ||||
// FIXME: Needs physical registers | // FIXME: Needs physical registers | ||||
//addPass(&PostRAMachineLICMID); | //addPass(&PostRAMachineLICMID); | ||||
printAndVerify("After StackSlotColoring"); | printAndVerify("After StackSlotColoring"); | ||||
} | } | ||||
void NVPTXPassConfig::addMachineSSALowering() { | |||||
// Lower shared frame indices. | |||||
addPass(createNVPTXLowerSharedFrameIndicesPass(), false); | |||||
} | |||||
void NVPTXPassConfig::addMachineSSAOptimization() { | void NVPTXPassConfig::addMachineSSAOptimization() { | ||||
// Pre-ra tail duplication. | // Pre-ra tail duplication. | ||||
if (addPass(&EarlyTailDuplicateID)) | if (addPass(&EarlyTailDuplicateID)) | ||||
printAndVerify("After Pre-RegAlloc TailDuplicate"); | printAndVerify("After Pre-RegAlloc TailDuplicate"); | ||||
// Optimize PHIs before DCE: removing dead PHI cycles may make more | // Optimize PHIs before DCE: removing dead PHI cycles may make more | ||||
// instructions dead. | // instructions dead. | ||||
addPass(&OptimizePHIsID); | addPass(&OptimizePHIsID); | ||||
// To avoid SSA optimizations on the local frame indices from treating | |||||
// shared and local frame indices the same, we will lower shared frame | |||||
// before the optimizations are applied. | |||||
addMachineSSALowering(); | |||||
// This pass merges large allocas. StackSlotColoring is a different pass | // This pass merges large allocas. StackSlotColoring is a different pass | ||||
// which merges spill slots. | // which merges spill slots. | ||||
addPass(&StackColoringID); | addPass(&StackColoringID); | ||||
// If the target requests it, assign local variables to stack slots relative | // If the target requests it, assign local variables to stack slots relative | ||||
// to one another and simplify frame index references where possible. | // to one another and simplify frame index references where possible. | ||||
addPass(&LocalStackSlotAllocationID); | addPass(&LocalStackSlotAllocationID); | ||||
Show All 22 Lines |