Index: llvm/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -424,6 +424,14 @@ bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; + bool foldFrameOffset(MachineInstr &MI) const; + bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const; + bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const; + bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, + unsigned &XFormOpcode, + int64_t &OffsetOfImmInstr, + ImmInstrInfo &III) const; + /// Fixup killed/dead flag for register \p RegNo between instructions [\p /// StartMI, \p EndMI]. Some PostRA transformations may violate register /// killed/dead flags semantics, this function can be called to fix up. Before Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2536,6 +2536,224 @@ "RegNo should be killed or dead"); } +// This opt tries to convert following imm form to index form to save add for +// stack variables. +// Return false if no such pattern found. +// +// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi +// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg +// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed) +// +// can be converted to: +// +// new ADDI instr: ToBeChangedReg = ADDI FraemBaseReg, (OffsetAddi + OffsetImm) +// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed) +// +// In order to eliminate ADD instr, make sure that: +// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in +// new ADDI instr and ADDI can only take int16 Imm. +// 2: ToBeChangedReg must be killed in ADD instr and there is no other use +// between ADDI and ADD instr since its original def in ADDI will be changed +// in new ADDI instr. And also there should be no new def for it between +// ADD and Imm instr as ToBeChangedReg will be used in Index instr. +// 3: ToBeDeletedReg register must be killed in Imm instr and there is no other +// use between ADD and Imm instr since ADD instr will be eliminated. +// 4: ScaleReg must be equal to ToBeChangedReg or ToBeDeletedReg since Index +// instr can only have two input register operands. +bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const { + MachineFunction *MF = MI.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + bool PostRA = !MRI->isSSA(); + // Do this opt after PEI which is after RA. The reason is stack slot expansion + // in PEI may expose such opportunities since in PEI, stack slot offsets to + // frame base(OffsetAddi) are determined. + if (!PostRA) + return false; + unsigned ToBeDeletedReg = 0; + int64_t OffsetImm = 0; + unsigned XFormOpcode = 0; + ImmInstrInfo III; + + // Check if Imm instr meets requirement. + if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm, + III)) + return false; + + bool OtherIntermediateUse = false; + MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse); + + // Exit if there is other use between ADD and Imm instr or no def found. + if (OtherIntermediateUse || !ADDMI) + return false; + + // Check if ADD instr meets requirement. + if (!isADDInstrEligibleForFolding(*ADDMI)) + return false; + + unsigned ScaleRegIdx = 0; + unsigned ToBeChangedReg = 0; + int64_t OffsetAddi = 0; + bool CanFold = false; + + MachineInstr *ADDIMI = nullptr; + + for (int Idx = 1, e = ADDMI->getNumOperands(); Idx < e; Idx++) { + OtherIntermediateUse = false; + if (ADDMI->getOperand(Idx).isKill()) { + ADDIMI = getDefMIPostRA(ADDMI->getOperand(Idx).getReg(), *ADDMI, + OtherIntermediateUse); + // Currently handle only one "add + Imminstr" pair case, exit if other + // intermediate use for ToBeChangedReg found. + // TODO: handle the cases where there are other "add + Imminstr" pairs + // with same offset in Imminstr which is like: + // + // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi + // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1 + // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed) + // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2 + // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed) + // + // can be converted to: + // + // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, + // (OffsetAddi + OffsetImm) + // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg + // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed) + if (OtherIntermediateUse || !ADDIMI) + continue; + + // Check if ADDI instr meets requirement. + if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi)) + continue; + + if (isInt<16>(OffsetAddi + OffsetImm)) { + // Good candiate found. + CanFold = true; + ToBeChangedReg = ADDIMI->getOperand(0).getReg(); + ScaleRegIdx = Idx == 1 ? 2 : 1; + break; + } + } + } + + if (!CanFold) + return false; + + // Make sure no other def for ToBeChangedReg between ADD Instr and Imm Instr. + MachineBasicBlock::iterator E = MI; + MachineBasicBlock::iterator It = *ADDMI; + It++; + for (; It != E; It++) + if (It->modifiesRegister(ToBeChangedReg, &getRegisterInfo())) + return false; + + // Now start to do the transformation. + LLVM_DEBUG(dbgs() << "Replace instruction: " + << "\n"); + LLVM_DEBUG(ADDIMI->dump()); + LLVM_DEBUG(ADDMI->dump()); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "with: " + << "\n"); + + // Update ADDI instr. + ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm); + + // Update Imm instr. + MI.setDesc(get(XFormOpcode)); + MI.getOperand(III.ImmOpNo) + .ChangeToRegister(ADDMI->getOperand(ScaleRegIdx).getReg(), false, false, + ADDMI->getOperand(ScaleRegIdx).isKill()); + + MI.getOperand(III.OpNoForForwarding) + .ChangeToRegister(ToBeChangedReg, false, false, true); + + // Eliminate ADD instr. + ADDMI->eraseFromParent(); + + LLVM_DEBUG(ADDIMI->dump()); + LLVM_DEBUG(MI.dump()); + + return true; +} + +bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, + int64_t &Imm) const { + unsigned Opc = ADDIMI.getOpcode(); + + // Exit if the instruction is not ADDI. + if (Opc != PPC::ADDI && Opc != PPC::ADDI8) + return false; + + Imm = ADDIMI.getOperand(2).getImm(); + + return true; +} + +bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const { + unsigned Opc = ADDMI.getOpcode(); + + // Exit if the instruction is not ADD. + if (Opc != PPC::ADD4 && Opc != PPC::ADD8) + return false; + + int UseOpIdx = ADDMI.findRegisterUseOperandIdx(ADDMI.getOperand(0).getReg(), + false, &getRegisterInfo()); + + // Only do this for destructive ADDMI. + if (UseOpIdx == -1) + return false; + + assert((UseOpIdx == 1 || UseOpIdx == 2) && + "wrong operand index of add instruction"); + + return true; +} + +bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI, + unsigned &ToBeDeletedReg, + unsigned &XFormOpcode, + int64_t &OffsetImm, + ImmInstrInfo &III) const { + // Only handle load/store. + if (!MI.mayLoadOrStore()) + return false; + + unsigned Opc = MI.getOpcode(); + + // Exit if instruction has no index form. + if (!RI.getImmToIdxMap().count(Opc)) + return false; + + XFormOpcode = RI.getImmToIdxMap().find(Opc)->second; + + bool HasImmForm = instrHasImmForm( + XFormOpcode, isVFRegister(MI.getOperand(0).getReg()), III, true); + + // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap. + if (!HasImmForm) + return false; + + if (!III.IsSummingOperands) + return false; + + // Only support imm operands, not relocation slots or others. + if (!MI.getOperand(III.ImmOpNo).isImm()) + return false; + + assert(MI.getOperand(III.OpNoForForwarding).isReg() && + "Instruction format is not right"); + + // There are other use for ToBeDeletedReg after Imm instr, can not delete it. + if (!MI.getOperand(III.OpNoForForwarding).isKill()) + return false; + + ToBeDeletedReg = MI.getOperand(III.OpNoForForwarding).getReg(); + OffsetImm = MI.getOperand(III.ImmOpNo).getImm(); + + return true; +} + // If this instruction has an immediate form and one of its operands is a // result of a load-immediate or an add-immediate, convert it to // the immediate form if the constant is in range. Index: llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -35,6 +35,8 @@ "Number of instructions deleted in pre-emit peephole"); STATISTIC(NumberOfSelfCopies, "Number of self copy instructions eliminated"); +STATISTIC(NumFrameOffFoldInPreEmit, + "Number of folding frame offset by using r+r in pre-emit peephole"); static cl::opt RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), @@ -201,6 +203,12 @@ InstrsToErase.push_back(DefMIToErase); } } + if (TII->foldFrameOffset(MI)) { + Changed = true; + NumFrameOffFoldInPreEmit++; + LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); + LLVM_DEBUG(MI.dump()); + } } // Eliminate conditional branch based on a constant CR bit by Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -61,6 +61,12 @@ public: PPCRegisterInfo(const PPCTargetMachine &TM); + /// getImmToIdxMap - Return the ImmToIdxMap which contains mapping between + /// dform instructions and xform instructions. + const DenseMap &getImmToIdxMap(void) const { + return ImmToIdxMap; + } + /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * Index: llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir =================================================================== --- llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir +++ llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir @@ -18,11 +18,11 @@ bb.0.entry: liveins: $x3, $x1, $x4, $x6 $x3 = ADDI8 $x1, -80 - ; CHECK: $x3 = ADDI8 $x1, -80 + ; CHECK: $x3 = ADDI8 $x1, -76 $x4 = ADD8 killed $x3, killed $x4 - ; CHECK: $x4 = ADD8 killed $x3, killed $x4 + ; CHECK-NOT: ADD8 $x6 = LD 4, killed $x4 - ; CHECK: $x6 = LD 4, killed $x4 + ; CHECK: $x6 = LDX killed $x4, killed $x3 BLR8 implicit $lr8, implicit $rm ... --- @@ -34,11 +34,11 @@ bb.0.entry: liveins: $x3, $x1, $x4, $x6 $x3 = ADDI8 $x1, -80 - ; CHECK: $x3 = ADDI8 $x1, -80 + ; CHECK: $x3 = ADDI8 $x1, -76 $x3 = ADD8 killed $x3, killed $x4 - ; CHECK: $x3 = ADD8 killed $x3, killed $x4 + ; CHECK-NOT: ADD8 $x6 = LD 4, killed $x3 - ; CHECK: $x6 = LD 4, killed $x3 + ; CHECK: $x6 = LDX killed $x4, killed $x3 BLR8 implicit $lr8, implicit $rm ... ---