Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -272,7 +272,7 @@ } - bool FoldingImm = OpToFold.isImm(); + bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); // In order to fold immediates into copies, we need to change the // copy to a MOV. @@ -299,7 +299,7 @@ return; } - if (!FoldingImm) { + if (!OpToFold.isImm()) { tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit @@ -483,6 +483,8 @@ const SISubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); @@ -553,7 +555,9 @@ Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); - if (TII->isInlineConstant(OpToFold, OpSize)) { + if ((OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpSize)) || + (OpToFold.isFI() && + TII->isFrameIndexPreAllocInlineImm(MFI, OpToFold.getIndex()))) { foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, CopiesToReplace, TII, TRI, MRI); } else { Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -464,6 +464,10 @@ // assumes that it will. bool isLiteralConstantLike(const MachineOperand &MO, unsigned OpSize) const; + /// \returns true if the object \p FI has already had its offset determined to + /// be a valid inline immediate value. + bool isFrameIndexPreAllocInlineImm(const MachineFrameInfo &MFI, int FI) const; + bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1770,6 +1770,27 @@ } } +bool SIInstrInfo::isFrameIndexPreAllocInlineImm(const MachineFrameInfo &MFI, + int FI) const { + // Check if LocalStackSlotAllocation has already determined the offset for + // this frame index. + if (!MFI.getUseLocalStackAllocationBlock() || !MFI.isObjectPreAllocated(FI)) + return false; + + for (int I = 0, E = MFI.getLocalFrameObjectCount(); I != E; ++I) { + int ObjFI; + int64_t ObjOffset; + std::tie(ObjFI, ObjOffset) = MFI.getLocalFrameObjectMap(I); + if (ObjFI == FI) { + assert(isUInt<32>(ObjOffset)); + dbgs() << "Found FI#" << FI << " at offset " << ObjOffset << '\n'; + return isInlineConstant(APInt(32, ObjOffset)); + } + } + + return false; +} + bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const { const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo]; @@ -1782,9 +1803,21 @@ if (OpInfo.RegClass < 0) return false; - unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); - if (isLiteralConstant(MO, OpSize)) + if (MO.isFI()) { + assert(AMDGPU::getRegBitWidth(OpInfo.RegClass) == 32); + + if (RI.opCanUseInlineConstant(OpInfo.OperandType) && + isFrameIndexPreAllocInlineImm(MI.getParent()->getParent()->getFrameInfo(), + MO.getIndex())) + return true; + return RI.opCanUseLiteralConstant(OpInfo.OperandType); + } + + unsigned OpSize = AMDGPU::getRegBitWidth(OpInfo.RegClass) / 8; + if (isLiteralConstant(MO, OpSize)) { + return RI.opCanUseLiteralConstant(OpInfo.OperandType); + } return RI.opCanUseInlineConstant(OpInfo.OperandType); }