Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -58,8 +58,8 @@ /// to eliminate execution delays in some Atom processors. FunctionPass *createX86FixupLEAs(); -/// createX86OptimizeLEAs() - Return a pass that removes redundant -/// address recalculations. +/// createX86OptimizeLEAs() - Return a pass that removes redundant LEA +/// instructions and redundant address recalculations. FunctionPass *createX86OptimizeLEAs(); /// createX86CallFrameOptimization - Return a pass that optimizes Index: lib/Target/X86/X86OptimizeLEAs.cpp =================================================================== --- lib/Target/X86/X86OptimizeLEAs.cpp +++ lib/Target/X86/X86OptimizeLEAs.cpp @@ -9,8 +9,10 @@ // // This file defines the pass that performs some optimizations with LEA // instructions in order to improve code size. -// Currently, it does one thing: -// 1) Address calculations in load and store instructions are replaced by +// Currently, it does two things: +// 1) If there are two LEA instructions calculating addresses which only differ +// by displacement inside a basic block, one of them is removed. +// 2) Address calculations in load and store instructions are replaced by // existing LEA def registers where possible. // //===----------------------------------------------------------------------===// @@ -34,6 +36,7 @@ #define DEBUG_TYPE "x86-optimize-LEAs" STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions"); +STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed"); namespace { class OptimizeLEAPass : public MachineFunctionPass { @@ -66,6 +69,10 @@ /// \brief Returns true if the instruction is LEA. bool isLEA(const MachineInstr &MI); + /// \brief Returns true if 'Last' LEA instruction can be replaced by 'First'. + bool isReplaceable(const MachineInstr &First, const MachineInstr &Last, + int64_t &AddrDispShift); + /// \brief Returns true if two instructions have memory operands that only /// differ by displacement. The numbers of the first memory operands for both /// instructions are specified through N1 and N2. The address displacement is @@ -81,6 +88,9 @@ /// \brief Removes redundant address calculations. bool removeRedundantAddrCalc(const SmallVectorImpl &List); + /// \brief Removes LEAs which calculate similar adresses. + bool removeRedundantLEAs(SmallVectorImpl &List); + MachineRegisterInfo *MRI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; @@ -180,6 +190,68 @@ Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; } +// Check that 'Last' LEA can be replaced by 'First' LEA. To be so, these +// requirements must be met: +// 1) Addresses calculated by LEAs deffer only by displacement. +// 2) Def registers of LEAs belong to the same class. +// 3) All uses of 'Last' LEA def register are replaceable, thus the register is +// used only as address base. +bool OptimizeLEAPass::isReplaceable(const MachineInstr &First, + const MachineInstr &Last, + int64_t &AddrDispShift) { + assert(isLEA(First) && isLEA(Last) && + "The function works only with LEA instructions"); + + // Compare instructions' memory operands. + if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift)) + return false; + + // Make sure that LEA def registers belong to the same class. There may be + // instructions (like MOV8mr_NOREX) which allow a limited set of registers to + // be used as their operands, so we must be sure that replacing one LEA + // with another won't lead to putting a wrong register in the instruction. + if (MRI->getRegClass(First.getOperand(0).getReg()) != + MRI->getRegClass(Last.getOperand(0).getReg())) + return false; + + // Loop over all uses of 'Last' LEA to check that its def register is + // used only as address base for memory accesses. If so, it can be + // replaced, otherwise - no. + assert(First.getOperand(0).isDef() && First.getOperand(0).isReg() && + Last.getOperand(0).isDef() && Last.getOperand(0).isReg() && + "The first LEA operand must be def register"); + for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) { + MachineInstr &MI = *MO.getParent(); + + // Get the number of the first memory operand. + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()); + + // If use instruction has no memory operand - LEA is not replaceable. + if (MemOpNo < 0) + return false; + + MemOpNo += X86II::getOperandBias(Desc); + + // If LEA def register is used not as address base or not only so - LEA + // is not replaceable. + if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO)) + return false; + for (unsigned i = 0; i < MI.getNumOperands(); i++) + if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) && + isIdenticalOp(MI.getOperand(i), MO)) + return false; + + // Check that the new address displacement will fit 4 bytes. + if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() && + !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() + + AddrDispShift)) + return false; + } + + return true; +} + // Check if MI1 and MI2 have memory operands which represent addresses that // differ only by displacement. bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1, @@ -287,6 +359,78 @@ return Changed; } +// Try to find similar LEAs in the list and replace one with another. +bool +OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl &List) { + bool Changed = false; + + // Loop over all LEA pairs. + auto I1 = List.begin(); + while (I1 != List.end()) { + MachineInstr &First = **I1; + auto I2 = std::next(I1); + while (I2 != List.end()) { + MachineInstr &Last = **I2; + int64_t AddrDispShift; + + // LEAs should be in occurence order in the list, so we can freely + // replace later LEAs with earlier ones. + assert(calcInstrDist(First, Last) > 0 && + "LEAs must be in occurence order in the list"); + + // Check that 'Last' LEA instruction can be replaced by 'First'. + if (!isReplaceable(First, Last, AddrDispShift)) { + ++I2; + continue; + } + + // Loop over all uses of 'Last' LEA and update their operands. Note that + // the correctness of this has already been checked in isReplaceable + // function. + for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()), + UE = MRI->use_end(); + UI != UE;) { + MachineOperand &MO = *UI++; + MachineInstr &MI = *MO.getParent(); + + // Get the number of the first memory operand. + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + + X86II::getOperandBias(Desc); + + // Update address base. + MO.setReg(First.getOperand(0).getReg()); + + // Update address disp. + MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp); + if (Op->isImm()) + Op->setImm(Op->getImm() + AddrDispShift); + else if (Op->isGlobal()) + Op->setOffset(Op->getOffset() + AddrDispShift); + } + + // Since we can possibly extend register lifetime, clear kill flags. + MRI->clearKillFlags(First.getOperand(0).getReg()); + + ++NumRedundantLEAs; + DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump();); + + // By this moment, all 'Last' LEA uses must be replaced. So we can + // freely remove it. + assert(MRI->use_empty(Last.getOperand(0).getReg())); + Last.eraseFromParent(); + + // Erase removed LEA from the list. + I2 = List.erase(I2); + + Changed = true; + } + ++I1; + } + + return Changed; +} + bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; bool OptSize = MF.getFunction()->optForSize(); @@ -311,6 +455,11 @@ if (LEAs.empty()) continue; + // Remove redundant LEA instructions. The optimization may have a negative + // effect on performance, so do it only for -Oz. + if (MinSize) + Changed |= removeRedundantLEAs(LEAs); + // Remove redundant address calculations. Changed |= removeRedundantAddrCalc(LEAs); } Index: test/CodeGen/X86/lea-opt-1.ll =================================================================== --- test/CodeGen/X86/lea-opt-1.ll +++ test/CodeGen/X86/lea-opt-1.ll @@ -83,3 +83,41 @@ ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) ; CHECK: movl ${{[1-4]+}}, ([[REG3]]) } + +define void @test3(i64 %x) nounwind minsize { +entry: + %a = getelementptr inbounds [65 x %struct.anon], [65 x %struct.anon]* @arr, i64 0, i64 %x, i32 0 + %0 = load i32, i32* %a, align 4 + %b = getelementptr inbounds [65 x %struct.anon], [65 x %struct.anon]* @arr, i64 0, i64 %x, i32 1 + %1 = load i32, i32* %b, align 4 + %sub = sub i32 %0, %1 + %c = getelementptr inbounds [65 x %struct.anon], [65 x %struct.anon]* @arr, i64 0, i64 %x, i32 2 + %2 = load i32, i32* %c, align 4 + %add = add nsw i32 %sub, %2 + switch i32 %add, label %sw.epilog [ + i32 1, label %sw.bb.1 + i32 2, label %sw.bb.2 + ] + +sw.bb.1: + store i32 111, i32* %b, align 4 + store i32 222, i32* %c, align 4 + br label %sw.epilog + +sw.bb.2: + store i32 333, i32* %b, align 4 + store i32 444, i32* %c, align 4 + br label %sw.epilog + +sw.epilog: + ret void +; CHECK-LABEL: test3: +; CHECK: leaq arr+4({{.*}}), [[REG2:%[a-z]+]] +; CHECK: movl -4([[REG2]]), {{.*}} +; CHECK: subl ([[REG2]]), {{.*}} +; CHECK: addl 4([[REG2]]), {{.*}} +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) +}