Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/FastISel.h" Index: lib/Target/X86/X86OptimizeLEAs.cpp =================================================================== --- lib/Target/X86/X86OptimizeLEAs.cpp +++ lib/Target/X86/X86OptimizeLEAs.cpp @@ -274,7 +274,7 @@ static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1, const MachineOperand &MO2) { MachineInstr *MI1 = nullptr; - MachineInstr *MI2 = nullptr; + MachineInstr *MI2 = nullptr; if (!MO1.isReg() || !MO2.isReg()) return false; @@ -324,7 +324,9 @@ } static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) { - if (!Opr.isReg() || TargetRegisterInfo::isPhysicalRegister(Opr.getReg())) + bool isInstrErased = Opr.isReg() && Opr.getParent()->getParent() ? 0 : 1; + if (!Opr.isReg() || isInstrErased || + TargetRegisterInfo::isPhysicalRegister(Opr.getReg())) return false; MachineInstr *MI = MRI->getVRegDef(Opr.getReg()); return MI && MI->isCopyLike(); @@ -442,6 +444,14 @@ if (containsPhyReg(MI, 2)) return; + // Factorization is beneficial only for complex LEAs. + MachineOperand &Base = MI->getOperand(1); + MachineOperand &Index = MI->getOperand(3); + MachineOperand &Offset = MI->getOperand(4); + if ((Offset.isImm() && !Offset.getImm()) || + (!Base.isReg() || !Base.getReg()) || (!Index.isReg() || !Index.getReg())) + return; + MemOpKey Key = getMemOpCSEKey(*MI, 1); ScopeEntryT *TopScope = getTopScope(); @@ -732,7 +742,9 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { bool Changed = false; - assert(!LEAs.empty()); + if(LEAs.empty()) + return Changed; + MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent(); // Process all instructions in basic block. @@ -902,6 +914,10 @@ // Erase removed LEA from the list. I2 = List.erase(I2); + // If List becomes empty remove it from LEAs map. + if (List.empty()) + LEAs.erase(E.first); + Changed = true; } ++I1; @@ -934,7 +950,8 @@ auto &List = E.second; // Loop over all LEA pairs. - for (auto I1 = List.begin(); I1 != List.end(); I1++) { + auto I1 = List.begin(); + while (I1 != List.end()) { MachineInstrBuilder NewMI; MachineInstr &First = **I1; MachineOperand &Res = First.getOperand(0); @@ -946,25 +963,44 @@ const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode())); const DebugLoc DL = First.getDebugLoc(); - if (!Base.isReg() || !Index.isReg()) + if (!Base.isReg() || !Index.isReg() || !Index.getReg()) { + I1++; continue; + } + if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) || TargetRegisterInfo::isPhysicalRegister(Base.getReg()) || - TargetRegisterInfo::isPhysicalRegister(Index.getReg())) + TargetRegisterInfo::isPhysicalRegister(Index.getReg())) { + I1++; continue; + } + + // Check for register class compatibility between Result and + // Index operands. + const TargetRegisterClass *ResRC = MRI->getRegClass(Res.getReg()); + const TargetRegisterClass *IndexRC = MRI->getRegClass(Index.getReg()); + if (TRI->getRegSizeInBits(*ResRC) != TRI->getRegSizeInBits(*IndexRC)) { + I1++; + continue; + } MachineBasicBlock &MBB = *(const_cast(&BB)); - if (Scale.isImm() && Scale.getImm() == 1) { - // R = B + I - if (Offset.isImm() && !Offset.getImm()) { - NewMI = BuildMI(MBB, &First, DL, ADDrr) - .addDef(Res.getReg()) - .addUse(Base.getReg()) - .addUse(Index.getReg()); - Changed = NewMI.getInstr() != nullptr; - First.eraseFromParent(); - } - } + // R = B + I + if (Scale.isImm() && Scale.getImm() == 1 && Offset.isImm() && + !Offset.getImm()) { + NewMI = BuildMI(MBB, &First, DL, ADDrr) + .addDef(Res.getReg()) + .addUse(Base.getReg()) + .addUse(Index.getReg()); + Changed = NewMI.getInstr() != nullptr; + First.eraseFromParent(); + I1 = List.erase(I1); + + // If List becomes empty remove it from LEAs map. + if (List.empty()) + LEAs.erase(E.first); + } else + I1++; } } return Changed; @@ -988,7 +1024,7 @@ auto &List = E.second; if (List.size() > 1) List.sort(CompareFn); - + // Loop over all LEA pairs. for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) { for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) { @@ -1003,25 +1039,52 @@ assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg"); DebugLoc DL = LI1.getDebugLoc(); + // Continue if instruction has already been factorized. if (FactorOpt.checkIfScheduledForRemoval(&LI1)) continue; int Factor = Scale1 - Scale2; if (Factor > 0 && LegalScale[Factor]) { + MachineOperand NewBase = LI2.getOperand(0); + MachineOperand NewIndex = LI1.getOperand(3); + + const TargetRegisterClass *LI2ResRC = + MRI->getRegClass(LI2.getOperand(0).getReg()); + const TargetRegisterClass *LI1BaseRC = + MRI->getRegClass(LI1.getOperand(1).getReg()); + + if (TRI->getRegSizeInBits(*LI1BaseRC) > + TRI->getRegSizeInBits(*LI2ResRC)) { + MachineInstr *LI1IndexDef = + MRI->getVRegDef(LI1.getOperand(3).getReg()); + if (LI1IndexDef->getOpcode() == TargetOpcode::SUBREG_TO_REG) { + MachineOperand &SubReg = LI1IndexDef->getOperand(2); + const TargetRegisterClass *SubRegRC = + MRI->getRegClass(SubReg.getReg()); + if (TRI->getRegSizeInBits(*SubRegRC) == + TRI->getRegSizeInBits(*LI2ResRC)) + NewIndex = SubReg; + else + continue; + } + } + DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump();); MachineInstrBuilder NewMI = BuildMI(*(const_cast(&MBB)), &LI1, DL, TII->get(LI1.getOpcode())) .addDef(LI1.getOperand(0).getReg()) // Dst = Dst of LI1. - .addUse(LI2.getOperand(0).getReg()) // Base = Dst of LI2. - .addImm(Factor) // Scale = Diff b/w scales. - .addUse(LI1.getOperand(3).getReg()) // Index = Index of LI1. - .addImm(0) // Disp = 0 + .addUse(NewBase.getReg()) // Base = Dst to LI2. + .addImm(Factor) // Scale = Diff b/w scales. + .addUse(NewIndex.getReg()) // Index = Index of LI1. + .addImm(0) // Disp = 0 .addUse( LI1.getOperand(5).getReg()); // Segment = Segmant of LI1. cseDone = NewMI.getInstr() != nullptr; + /// To preserve the SSA nature we need to remove Def flag + /// from old result. LI1.getOperand(0).setIsDef(false); /// Lazy removal shall ensure that replaced LEA remains @@ -1042,26 +1105,26 @@ bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) { bool Changed = false; - using StackT = SmallVector; - using ProcessedMapT = DenseMap; + using StackT = SmallVector; + using ProcessedMapT = DenseMap; StackT WorkList; ProcessedMapT ProcessesMap; WorkList.push_back(DN); - while(!WorkList.empty()) { - MachineDomTreeNode * MDN = WorkList.back(); + while (!WorkList.empty()) { + MachineDomTreeNode *MDN = WorkList.back(); if (ProcessesMap.find(MDN) == ProcessesMap.end()) { - ProcessesMap[MDN] = true; - FactorOpt.pushScope(MDN->getBlock()); - Changed |= processBasicBlock(*MDN->getBlock()); - for (auto Child : MDN->getChildren()) - WorkList.push_back(Child); + ProcessesMap[MDN] = true; + FactorOpt.pushScope(MDN->getBlock()); + Changed |= processBasicBlock(*MDN->getBlock()); + for (auto Child : MDN->getChildren()) + WorkList.push_back(Child); } MachineDomTreeNode *TDM = WorkList.back(); if (MDN->getLevel() == TDM->getLevel()) { - FactorOpt.popScope(); - WorkList.pop_back(); + FactorOpt.popScope(); + WorkList.pop_back(); } } return Changed; Index: test/CodeGen/X86/lea-opt-cse1.ll =================================================================== --- test/CodeGen/X86/lea-opt-cse1.ll +++ test/CodeGen/X86/lea-opt-cse1.ll @@ -11,7 +11,7 @@ ; X64-NEXT: movl 16(%rdi), %ecx ; X64-NEXT: leal 1(%rax,%rcx), %eax ; X64-NEXT: movl %eax, 12(%rdi) -; X64-NEXT: addq %rcx, %eax +; X64-NEXT: addq %ecx, %eax ; X64-NEXT: movl %eax, 16(%rdi) ; X64-NEXT: retq ; Index: test/CodeGen/X86/lea-opt-cse2.ll =================================================================== --- test/CodeGen/X86/lea-opt-cse2.ll +++ test/CodeGen/X86/lea-opt-cse2.ll @@ -25,9 +25,7 @@ ; X86-LABEL: foo: ; X86: # BB#0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: .Lcfi0: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .Lcfi1: ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax Index: test/CodeGen/X86/lea-opt-cse3.ll =================================================================== --- test/CodeGen/X86/lea-opt-cse3.ll +++ test/CodeGen/X86/lea-opt-cse3.ll @@ -8,7 +8,7 @@ ; X64-NEXT: # kill: %ESI %ESI %RSI ; X64-NEXT: # kill: %EDI %EDI %RDI ; X64-NEXT: leal 4(%rdi,%rsi,2), %ecx -; X64-NEXT: leal (%ecx,%rsi,2), %eax +; X64-NEXT: leal (%ecx,%esi,2), %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: retq ; @@ -36,7 +36,7 @@ ; X64-NEXT: # kill: %ESI %ESI %RSI ; X64-NEXT: # kill: %EDI %EDI %RDI ; X64-NEXT: leal 4(%rdi,%rsi,4), %ecx -; X64-NEXT: leal (%ecx,%rsi,4), %eax +; X64-NEXT: leal (%ecx,%esi,4), %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: retq ; @@ -68,7 +68,7 @@ ; X64-NEXT: cmpl $10, %ecx ; X64-NEXT: je .LBB2_2 ; X64-NEXT: # BB#1: # %mid -; X64-NEXT: leal (%ecx,%rsi,4), %eax +; X64-NEXT: leal (%ecx,%esi,4), %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: .LBB2_2: # %exit ; X64-NEXT: retq @@ -123,9 +123,7 @@ ; X86-LABEL: foo1_mult_basic_blocks_illegal_scale: ; X86: # BB#0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: .Lcfi0: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .Lcfi1: ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi Index: test/CodeGen/X86/lea-opt-cse4.ll =================================================================== --- test/CodeGen/X86/lea-opt-cse4.ll +++ test/CodeGen/X86/lea-opt-cse4.ll @@ -18,9 +18,6 @@ ; ; X86-LABEL: foo: ; X86: # BB#0: # %entry -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %ecx ; X86-NEXT: movl 16(%eax), %edx @@ -76,9 +73,7 @@ ; X86-LABEL: foo_loop: ; X86: # BB#0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: .Lcfi0: ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .Lcfi1: ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax Index: test/CodeGen/X86/pr34629.ll =================================================================== --- test/CodeGen/X86/pr34629.ll +++ test/CodeGen/X86/pr34629.ll @@ -13,8 +13,8 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movq {{.*}}(%rip), %rax ; CHECK-NEXT: leaq (%rax,%rax,4), %rcx -; CHECK-NEXT: leaq (%rcx,%rax,4), %rax ; CHECK-NEXT: negq %rcx +; CHECK-NEXT: leaq (%rax,%rax,8), %rax ; CHECK-NEXT: leaq (%rax,%rax,4), %rax ; CHECK-NEXT: testq %rax, %rcx ; CHECK-NEXT: je .LBB0_2 Index: test/CodeGen/X86/pr34634.ll =================================================================== --- test/CodeGen/X86/pr34634.ll +++ test/CodeGen/X86/pr34634.ll @@ -14,11 +14,11 @@ ; CHECK-NEXT: movslq {{.*}}(%rip), %rax ; CHECK-NEXT: leaq (%rax,%rax,4), %rcx ; CHECK-NEXT: leaq (,%rax,4), %rdx -; CHECK-NEXT: movl a(%rdx,%rcx,8), %edx -; CHECK-NEXT: leaq (%rcx,%rax,4), %rcx -; CHECK-NEXT: leaq (%rcx,%rcx,2), %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: movl %edx, b(%rcx,%rax,4) +; CHECK-NEXT: movl a(%rdx,%rcx,8), %ecx +; CHECK-NEXT: leaq (%rax,%rax,8), %rdx +; CHECK-NEXT: leaq (%rdx,%rdx,2), %rdx +; CHECK-NEXT: addq %rax, %rdx +; CHECK-NEXT: movl %ecx, b(%rdx,%rax,4) ; CHECK-NEXT: retq entry: %0 = load i32, i32* @c, align 4, !tbaa !2 @@ -37,11 +37,11 @@ ; CHECK-NEXT: movslq {{.*}}(%rip), %rax ; CHECK-NEXT: leaq (%rax,%rax,4), %rcx ; CHECK-NEXT: leaq (,%rax,4), %rdx -; CHECK-NEXT: movl a(%rdx,%rcx,8), %edx -; CHECK-NEXT: leaq (%rcx,%rax,4), %rcx -; CHECK-NEXT: leaq (%rcx,%rcx,2), %rcx -; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: movl %edx, b(%rcx,%rax,4) +; CHECK-NEXT: movl a(%rdx,%rcx,8), %ecx +; CHECK-NEXT: leaq (%rax,%rax,8), %rdx +; CHECK-NEXT: leaq (%rdx,%rdx,2), %rdx +; CHECK-NEXT: addq %rax, %rdx +; CHECK-NEXT: movl %ecx, b(%rdx,%rax,4) ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq entry: