Index: include/llvm/CodeGen/MachineOperand.h =================================================================== --- include/llvm/CodeGen/MachineOperand.h +++ include/llvm/CodeGen/MachineOperand.h @@ -593,6 +593,9 @@ /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. void ChangeToMCSymbol(MCSymbol *Sym); + /// Replace this operand with a frame index. + void ChangeToFrameIndex(int Idx); + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -175,6 +175,16 @@ Contents.Sym = Sym; } +void MachineOperand::ChangeToFrameIndex(int Idx) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a FrameIndex"); + + removeRegFromUses(); + + OpKind = MO_FrameIndex; + setIndex(Idx); +} + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -48,24 +48,36 @@ struct FoldCandidate { MachineInstr *UseMI; - unsigned UseOpNo; - MachineOperand *OpToFold; - uint64_t ImmToFold; + union { + MachineOperand *OpToFold; + uint64_t ImmToFold; + int FrameIndexToFold; + }; + unsigned char UseOpNo; + MachineOperand::MachineOperandType Kind; FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : - UseMI(MI), UseOpNo(OpNo) { - + UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) { if (FoldOp->isImm()) { - OpToFold = nullptr; ImmToFold = FoldOp->getImm(); + } else if (FoldOp->isFI()) { + FrameIndexToFold = FoldOp->getIndex(); } else { assert(FoldOp->isReg()); OpToFold = FoldOp; } } + bool isFI() const { + return Kind == MachineOperand::MO_FrameIndex; + } + bool isImm() const { - return !OpToFold; + return Kind == MachineOperand::MO_Immediate; + } + + bool isReg() const { + return Kind == MachineOperand::MO_Register; } }; @@ -107,6 +119,11 @@ return true; } + if (Fold.isFI()) { + Old.ChangeToFrameIndex(Fold.FrameIndexToFold); + return true; + } + MachineOperand *New = Fold.OpToFold; if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && TargetRegisterInfo::isVirtualRegister(New->getReg())) { @@ -328,7 +345,7 @@ unsigned OpSize = TII->getOpSize(MI, 1); MachineOperand &OpToFold = MI.getOperand(1); - bool FoldingImm = OpToFold.isImm(); + bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); // FIXME: We could also be folding things like FrameIndexes and // TargetIndexes. @@ -380,7 +397,7 @@ for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, TRI)) { // Clear kill flags. - if (!Fold.isImm()) { + if (Fold.isReg()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); // FIXME: Probably shouldn't bother trying to fold if not an // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR Index: test/CodeGen/AMDGPU/local-stack-slot-bug.ll =================================================================== --- test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ test/CodeGen/AMDGPU/local-stack-slot-bug.ll @@ -7,10 +7,8 @@ ; ; CHECK-LABEL: {{^}}main: ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 -; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200 -; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0 -; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]] -; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]] +; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]] +; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]] ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen define amdgpu_ps float @main(i32 %idx) {