Index: lib/Target/NVPTX/NVPTXPeephole.cpp =================================================================== --- lib/Target/NVPTX/NVPTXPeephole.cpp +++ lib/Target/NVPTX/NVPTXPeephole.cpp @@ -22,7 +22,7 @@ // This peephole pass optimizes these cases, for example // // It will transform the following pattern -// %vreg0 = LEA_ADDRi64 , 4 +// %vreg0 = LEA_ADDRi64 %VRFrame, 4 // %vreg1 = cvta_to_local_yes_64 %vreg0 // // into @@ -36,7 +36,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -96,7 +95,7 @@ // Check the LEA_ADDRi operand is Frame index auto &BaseAddrOp = GenericAddrDef->getOperand(1); - if (BaseAddrOp.getType() == MachineOperand::MO_FrameIndex) { + if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) { return true; } @@ -110,16 +109,11 @@ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - // Get the correct offset - int FrameIndex = Prev.getOperand(1).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - Prev.getOperand(2).getImm(); - MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()), Root.getOperand(0).getReg()) .addReg(NVPTX::VRFrameLocal) - .addOperand(MachineOperand::CreateImm(Offset)); + .addOperand(Prev.getOperand(2)); MBB.insert((MachineBasicBlock::iterator)&Root, MIB); @@ -145,6 +139,15 @@ } } // Instruction } // Basic Block + + // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal + const auto &MRI = MF.getRegInfo(); + if (MRI.use_empty(NVPTX::VRFrame)) { + if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) { + MI->eraseFromParentAndMarkDBGValuesForRemoval(); + } + } + return Changed; } Index: lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -205,13 +205,15 @@ if (!ST.hasImageHandles()) addPass(createNVPTXReplaceImageHandlesPass()); - addPass(createNVPTXPeephole()); - return false; } void NVPTXPassConfig::addPostRegAlloc() { addPass(createNVPTXPrologEpilogPass(), false); + // NVPTXPrologEpilogPass calculates frame object offset and replace frame + // index with VRFrame register. NVPTXPeephole need to be run after that and + // will replace VRFrame with VRFrameLocal when possible. + addPass(createNVPTXPeephole()); } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { Index: test/CodeGen/NVPTX/local-stack-frame.ll =================================================================== --- test/CodeGen/NVPTX/local-stack-frame.ll +++ test/CodeGen/NVPTX/local-stack-frame.ll @@ -59,10 +59,16 @@ ; PTX32: cvta.local.u32 %SP, %SPL; ; PTX32: add.u32 {{%r[0-9]+}}, %SP, 0; +; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 0; +; PTX32: add.u32 {{%r[0-9]+}}, %SP, 4; +; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 4; ; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}} ; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}} ; PTX64: cvta.local.u64 %SP, %SPL; ; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 0; +; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 0; +; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 4; +; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 4; ; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} ; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} define void @foo4() {