Index: lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -167,9 +167,10 @@ disablePass(&TailDuplicateID); addPass(createNVPTXImageOptimizerPass()); - TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + + // === Propagate special address spaces === addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); // NVPTXLowerKernelArgs emits alloca for byval parameters which can often // be eliminated by SROA. @@ -180,6 +181,8 @@ // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. addPass(createDeadCodeEliminationPass()); + + // === Straight-line scalar optimizations === addPass(createSeparateConstOffsetFromGEPPass()); addPass(createSpeculativeExecutionPass()); // ReassociateGEPs exposes more opportunites for SLSR. See @@ -197,6 +200,22 @@ // NaryReassociate on GEPs creates redundant common expressions, so run // EarlyCSE after it. addPass(createEarlyCSEPass()); + + // === LSR and other generic IR passes === + TargetPassConfig::addIRPasses(); + // EarlyCSE is not strong enough to clean up what LSR produces. For example, + // GVN can combine + // + // %0 = add %a, %b + // %1 = add %b, %a + // + // and + // + // %0 = shl nsw %a, 2 + // %1 = shl %a, 2 + // + // but EarlyCSE can do neither of them. + addPass(llvm::createGVNPass()); } bool NVPTXPassConfig::addInstSelector() {