Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1410,6 +1410,19 @@ return false; } + if (AM.BaseType == X86ISelAddressMode::RegBase) { + if (AM.Base_Reg == N) { + SDValue Base_Reg = AM.Base_Reg; + AM.Base_Reg = AM.IndexReg; + AM.IndexReg = Base_Reg; + AM.Scale = 2; + return false; + } else if (AM.IndexReg == N) { + AM.Scale = 2; + return false; + } + } + // Otherwise, we cannot select it. return true; } Index: lib/Target/X86/X86OptimizeLEAs.cpp =================================================================== --- lib/Target/X86/X86OptimizeLEAs.cpp +++ lib/Target/X86/X86OptimizeLEAs.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -65,8 +66,8 @@ public: MemOpKey(const MachineOperand *Base, const MachineOperand *Scale, const MachineOperand *Index, const MachineOperand *Segment, - const MachineOperand *Disp) - : Disp(Disp) { + const MachineOperand *Disp, bool DispCheck = false) + : Disp(Disp), HardDispCheck(DispCheck) { Operands[0] = Base; Operands[1] = Scale; Operands[2] = Index; @@ -82,8 +83,10 @@ // Addresses' displacements don't have to be exactly the same. It only // matters that they use the same symbol/index/address. Immediates' or // offsets' differences will be taken care of during instruction - // substitution. - return isSimilarDispOp(*Disp, *Other.Disp); + // substitution. If HardDispCheck is true then Disp must be identical. + if (!HardDispCheck) + return isSimilarDispOp(*Disp, *Other.Disp); + return isIdenticalOp(*Disp,*Other.Disp); } // Address' base, scale, index and segment operands. @@ -91,6 +94,9 @@ // Address' displacement operand. const MachineOperand *Disp; + + // Forces absolute displacement check. + bool HardDispCheck; }; } // end anonymous namespace @@ -178,6 +184,17 @@ &MI.getOperand(N + X86::AddrDisp)); } +static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) { + static MachineOperand DummyScale = MachineOperand::CreateImm(1); + assert((isLEA(MI) || MI.mayLoadOrStore()) && + "The instruction must be a LEA, a load or a store"); + return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), + &DummyScale, + &MI.getOperand(N + X86::AddrIndexReg), + &MI.getOperand(N + X86::AddrSegmentReg), + &MI.getOperand(N + X86::AddrDisp), true); +} + static inline bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2) { return MO1.isIdenticalTo(MO2) && @@ -228,6 +245,12 @@ /// been calculated by LEA. Also, remove redundant LEAs. bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + } + private: typedef DenseMap> MemOpMap; @@ -261,6 +284,13 @@ /// distance between them. void findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs); + /// \brief Find all LEA instructions in the basic block that have same + /// Base, Index, Disp and Segment. + void populateCSEMap(const MachineBasicBlock &MBB, MemOpMap &LEAs); + + /// \brief Factor out LEAs which share Base,Index,Offset and Segment. + bool cseLEAs(const MachineBasicBlock &MBB); + /// \brief Removes redundant address calculations. bool removeRedundantAddrCalc(MemOpMap &LEAs); @@ -275,6 +305,7 @@ DenseMap InstrPos; + MachineDominatorTree *DT; MachineRegisterInfo *MRI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; @@ -286,6 +317,13 @@ FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } +void OptimizeLEAPass::populateCSEMap(const MachineBasicBlock &MBB, MemOpMap &LEAs) { + for (auto &MI : MBB) { + if (isLEA(MI)) + LEAs[getMemOpCSEKey(MI, 1)].push_back(const_cast(&MI)); + } +} + int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, const MachineInstr &Last) { // Both instructions must be in the same basic block and they must be @@ -646,6 +684,66 @@ return Changed; } +bool OptimizeLEAPass::cseLEAs(const MachineBasicBlock &MBB) { + MemOpMap LEAs; + bool cseDone = false; + + // Legal scale value (1,2,4 & 8) vector. + int LegalScale[9] = {0,1,1,0,1,0,0,0,1}; + + populateCSEMap(MBB,LEAs); + + auto CompareFn = + [] (const MachineInstr *Arg1,const MachineInstr *Arg2) -> bool { + if(Arg1->getOperand(2).getImm() < Arg2->getOperand(2).getImm()) + return false; + return true; + }; + + // Loop over all entries in the table. + for (auto &E : LEAs) { + auto &List = E.second; + if(List.size() > 1) { + std::sort(List.begin(),List.end(),CompareFn); + } + // Loop over all LEA pairs. + for(auto LII = List.begin(); LII != List.end(); LII++) { + MachineInstr &LI1 = **LII; + auto LINext = std::next(LII); + if(LINext == List.end()) + break; + MachineInstr &LI2 = **LINext; + if (!DT->dominates(&LI2,&LI1)) + continue; + + int Scale1 = LI1.getOperand(2).getImm(); + int Scale2 = LI2.getOperand(2).getImm(); + assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg"); + DebugLoc DL = LI1.getDebugLoc(); + + int Factor = Scale1 - Scale2; + if (Factor > 0 && LegalScale[Factor]) { + DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump();); + MachineInstr *NewMI = + BuildMI(*(const_cast(&MBB)), + &LI1,DL,TII->get(LI1.getOpcode())) + .addDef(LI1.getOperand(0).getReg()) // Dst = Dst of LI1. + .addUse(LI2.getOperand(0).getReg()) // Base = Dst of LI2. + .addImm(Factor) // Scale = Diff b/w scales. + .addUse(LI1.getOperand(3).getReg()) // Index = Index of LI1. + .addImm(0) // Disp = 0 + .addUse(LI1.getOperand(5).getReg()); // Segment = Segmant of LI1. + + LI1.eraseFromParent(); + cseDone = NewMI != nullptr; + DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump();); + } + } + } + return cseDone; +} + + bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; @@ -655,12 +753,16 @@ MRI = &MF.getRegInfo(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); + DT = &getAnalysis(); // Process all basic blocks. for (auto &MBB : MF) { MemOpMap LEAs; InstrPos.clear(); + // Attempt CSE over LEAs. + Changed |= cseLEAs(MBB); + // Find all LEA instructions in basic block. findLEAs(MBB, LEAs); Index: test/CodeGen/X86/lea-opt-cst.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/lea-opt-cst.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86 + +%struct.SA = type { i32 , i32 , i32 , i32 ,i32 } + +define void @test_func(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr { +; X64-LABEL: test_func: +; X64: # BB#0: # %entry +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl 16(%rdi), %ecx +; X64-NEXT: leal 1(%rax,%rcx), %eax +; X64-NEXT: movl %eax, 12(%rdi) +; X64-NEXT: leal (%eax,%rcx), %eax +; X64-NEXT: movl %eax, 16(%rdi) +; X64-NEXT: retq +; +; X86-LABEL: test_func: +; X86: # BB#0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %ecx +; X86-NEXT: movl 16(%eax), %edx +; X86-NEXT: leal 1(%ecx,%edx), %ecx +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal (%ecx,%edx), %ecx +; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: retl + entry: + %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0 + %0 = load i32, i32* %h0, align 8 + %h3 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 3 + %h4 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 4 + %1 = load i32, i32* %h4, align 8 + %add = add i32 %0, 1 + %add4 = add i32 %add, %1 + store i32 %add4, i32* %h3, align 4 + %add29 = add i32 %add4 , %1 + store i32 %add29, i32* %h4, align 8 + ret void +} Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -247,10 +247,9 @@ define i16 @test_mul_by_14(i16 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %eax -; X86-NEXT: leal (%ecx,%eax,4), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; @@ -258,8 +257,7 @@ ; X64: # BB#0: ; X64-NEXT: # kill: %EDI %EDI %RDI ; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: leal (%rdi,%rax,4), %eax -; X64-NEXT: addl %edi, %eax +; X64-NEXT: leal (%rax,%rdi,2), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 14 @@ -410,10 +408,9 @@ define i16 @test_mul_by_22(i16 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,4), %eax -; X86-NEXT: leal (%ecx,%eax,4), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; @@ -421,8 +418,7 @@ ; X64: # BB#0: ; X64-NEXT: # kill: %EDI %EDI %RDI ; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: leal (%rdi,%rax,4), %eax -; X64-NEXT: addl %edi, %eax +; X64-NEXT: leal (%rax,%rdi,2), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 22 @@ -558,11 +554,10 @@ define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; @@ -571,8 +566,7 @@ ; X64-NEXT: # kill: %EDI %EDI %RDI ; X64-NEXT: leal (%rdi,%rdi,8), %eax ; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: addl %edi, %eax -; X64-NEXT: addl %edi, %eax +; X64-NEXT: leal (%rax,%rdi,2), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 29 Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -669,26 +669,23 @@ define i32 @test_mul_by_14(i32 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %eax -; X86-NEXT: leal (%ecx,%eax,4), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_14: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # BB#0: ; X64-JAG-NEXT: # kill: %EDI %EDI %RDI ; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_14: @@ -1089,26 +1086,23 @@ define i32 @test_mul_by_22(i32 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,4), %eax -; X86-NEXT: leal (%ecx,%eax,4), %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_22: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # BB#0: ; X64-JAG-NEXT: # kill: %EDI %EDI %RDI ; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_22: @@ -1457,11 +1451,10 @@ define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_29: @@ -1469,8 +1462,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: @@ -1478,8 +1470,7 @@ ; X64-JAG-NEXT: # kill: %EDI %EDI %RDI ; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_29: Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -684,8 +684,7 @@ ; X86: # BB#0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal (%eax,%eax,2), %ecx -; X86-NEXT: leal (%eax,%ecx,4), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: leal (%ecx,%eax,2), %ecx ; X86-NEXT: movl $14, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx @@ -694,15 +693,13 @@ ; X64-HSW-LABEL: test_mul_by_14: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # BB#0: ; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_14: @@ -1130,8 +1127,7 @@ ; X86: # BB#0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: leal (%eax,%ecx,4), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: leal (%ecx,%eax,2), %ecx ; X86-NEXT: movl $22, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx @@ -1140,15 +1136,13 @@ ; X64-HSW-LABEL: test_mul_by_22: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # BB#0: ; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_22: @@ -1523,8 +1517,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: leal (%ecx,%eax,2), %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx @@ -1534,16 +1527,14 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: ; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_29: Index: test/CodeGen/X86/mul-constant-result.ll =================================================================== --- test/CodeGen/X86/mul-constant-result.ll +++ test/CodeGen/X86/mul-constant-result.ll @@ -27,16 +27,16 @@ ; X86-NEXT: .LBB0_4: ; X86-NEXT: decl %ecx ; X86-NEXT: cmpl $31, %ecx -; X86-NEXT: ja .LBB0_39 +; X86-NEXT: ja .LBB0_38 ; X86-NEXT: # BB#5: ; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4) ; X86-NEXT: .LBB0_6: ; X86-NEXT: addl %eax, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_39: +; X86-NEXT: .LBB0_38: ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: .LBB0_40: +; X86-NEXT: .LBB0_39: ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB0_7: @@ -89,90 +89,89 @@ ; X86-NEXT: retl ; X86-NEXT: .LBB0_19: ; X86-NEXT: leal (%eax,%eax,2), %ecx -; X86-NEXT: jmp .LBB0_20 -; X86-NEXT: .LBB0_21: +; X86-NEXT: leal (%ecx,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_20: ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_22: +; X86-NEXT: .LBB0_21: ; X86-NEXT: shll $4, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_23: +; X86-NEXT: .LBB0_22: ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_24: +; X86-NEXT: .LBB0_23: ; X86-NEXT: addl %eax, %eax ; X86-NEXT: leal (%eax,%eax,8), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_25: +; X86-NEXT: .LBB0_24: ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: shll $2, %ecx ; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_26: +; X86-NEXT: .LBB0_25: ; X86-NEXT: shll $2, %eax ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_27: +; X86-NEXT: .LBB0_26: ; X86-NEXT: leal (%eax,%eax,4), %ecx ; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_28: +; X86-NEXT: .LBB0_27: ; X86-NEXT: leal (%eax,%eax,4), %ecx -; X86-NEXT: .LBB0_20: -; X86-NEXT: leal (%eax,%ecx,4), %ecx -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_29: +; X86-NEXT: .LBB0_28: ; X86-NEXT: leal (%eax,%eax,2), %ecx ; X86-NEXT: shll $3, %ecx ; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_30: +; X86-NEXT: .LBB0_29: ; X86-NEXT: shll $3, %eax ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_31: +; X86-NEXT: .LBB0_30: ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_32: +; X86-NEXT: .LBB0_31: ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx ; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_33: +; X86-NEXT: .LBB0_32: ; X86-NEXT: leal (%eax,%eax,8), %eax ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl +; X86-NEXT: .LBB0_33: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; X86-NEXT: .LBB0_34: ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB0_35: -; X86-NEXT: leal (%eax,%eax,8), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl -; X86-NEXT: .LBB0_36: ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $5, %ecx ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: jmp .LBB0_12 -; X86-NEXT: .LBB0_37: +; X86-NEXT: .LBB0_36: ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $5, %ecx ; X86-NEXT: .LBB0_12: @@ -180,7 +179,7 @@ ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB0_38: +; X86-NEXT: .LBB0_37: ; X86-NEXT: shll $5, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -196,16 +195,16 @@ ; X64-HSW-NEXT: cmovel %ecx, %eax ; X64-HSW-NEXT: addl $-1, %edi ; X64-HSW-NEXT: cmpl $31, %edi -; X64-HSW-NEXT: ja .LBB0_36 +; X64-HSW-NEXT: ja .LBB0_35 ; X64-HSW-NEXT: # BB#1: ; X64-HSW-NEXT: jmpq *.LJTI0_0(,%rdi,8) ; X64-HSW-NEXT: .LBB0_2: ; X64-HSW-NEXT: addl %eax, %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_36: +; X64-HSW-NEXT: .LBB0_35: ; X64-HSW-NEXT: xorl %eax, %eax -; X64-HSW-NEXT: .LBB0_37: +; X64-HSW-NEXT: .LBB0_36: ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_3: @@ -258,86 +257,89 @@ ; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_15: ; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx -; X64-HSW-NEXT: jmp .LBB0_16 -; X64-HSW-NEXT: .LBB0_18: +; X64-HSW-NEXT: leal (%rcx,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_16: ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_19: +; X64-HSW-NEXT: .LBB0_17: ; X64-HSW-NEXT: shll $4, %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_20: +; X64-HSW-NEXT: .LBB0_18: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $4, %ecx -; X64-HSW-NEXT: jmp .LBB0_17 -; X64-HSW-NEXT: .LBB0_21: +; X64-HSW-NEXT: jmp .LBB0_19 +; X64-HSW-NEXT: .LBB0_20: ; X64-HSW-NEXT: addl %eax, %eax ; X64-HSW-NEXT: leal (%rax,%rax,8), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_22: +; X64-HSW-NEXT: .LBB0_21: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: shll $2, %ecx ; X64-HSW-NEXT: jmp .LBB0_8 -; X64-HSW-NEXT: .LBB0_23: +; X64-HSW-NEXT: .LBB0_22: ; X64-HSW-NEXT: shll $2, %eax ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_24: +; X64-HSW-NEXT: .LBB0_23: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_25: +; X64-HSW-NEXT: .LBB0_24: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx -; X64-HSW-NEXT: .LBB0_16: -; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx -; X64-HSW-NEXT: jmp .LBB0_17 -; X64-HSW-NEXT: .LBB0_26: +; X64-HSW-NEXT: leal (%rcx,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_25: ; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx ; X64-HSW-NEXT: shll $3, %ecx ; X64-HSW-NEXT: jmp .LBB0_8 -; X64-HSW-NEXT: .LBB0_27: +; X64-HSW-NEXT: .LBB0_26: ; X64-HSW-NEXT: shll $3, %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_28: +; X64-HSW-NEXT: .LBB0_27: ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_29: +; X64-HSW-NEXT: .LBB0_28: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx ; X64-HSW-NEXT: jmp .LBB0_8 -; X64-HSW-NEXT: .LBB0_30: +; X64-HSW-NEXT: .LBB0_29: ; X64-HSW-NEXT: leal (%rax,%rax,8), %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_30: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: .LBB0_19: +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_31: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: jmp .LBB0_17 -; X64-HSW-NEXT: .LBB0_32: -; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx -; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: addl %eax, %ecx -; X64-HSW-NEXT: .LBB0_17: -; X64-HSW-NEXT: addl %eax, %ecx -; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: leal (%rcx,%rax,2), %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_33: +; X64-HSW-NEXT: .LBB0_32: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $5, %ecx ; X64-HSW-NEXT: subl %eax, %ecx ; X64-HSW-NEXT: jmp .LBB0_8 -; X64-HSW-NEXT: .LBB0_34: +; X64-HSW-NEXT: .LBB0_33: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $5, %ecx ; X64-HSW-NEXT: .LBB0_8: @@ -345,7 +347,7 @@ ; X64-HSW-NEXT: movl %ecx, %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq -; X64-HSW-NEXT: .LBB0_35: +; X64-HSW-NEXT: .LBB0_34: ; X64-HSW-NEXT: shll $5, %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq Index: test/CodeGen/X86/umul-with-overflow.ll =================================================================== --- test/CodeGen/X86/umul-with-overflow.ll +++ test/CodeGen/X86/umul-with-overflow.ll @@ -1,37 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b) define zeroext i1 @a(i32 %x) nounwind { +; CHECK-LABEL: a: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl $3, %ecx +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: seto %al +; CHECK-NEXT: retl %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3) %obil = extractvalue {i32, i1} %res, 1 ret i1 %obil - -; CHECK-LABEL: a: -; CHECK: mull -; CHECK: seto %al -; CHECK: ret + } define i32 @test2(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: test2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: leal (%ecx,%eax,2), %eax +; CHECK-NEXT: retl entry: %tmp0 = add i32 %b, %a %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2) %tmp2 = extractvalue { i32, i1 } %tmp1, 0 ret i32 %tmp2 -; CHECK-LABEL: test2: -; CHECK: addl -; CHECK-NEXT: addl -; CHECK-NEXT: ret } define i32 @test3(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: test3: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: retl entry: %tmp0 = add i32 %b, %a %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4) %tmp2 = extractvalue { i32, i1 } %tmp1, 0 ret i32 %tmp2 -; CHECK-LABEL: test3: -; CHECK: addl -; CHECK: mull -; CHECK-NEXT: ret } Index: test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll =================================================================== --- test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -13,14 +13,14 @@ ; X64-NEXT: .p2align ; X64: %loop ; no complex address modes -; X64-NOT: (%{{[^)]+}},%{{[^)]+}}, +; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, ; ; X32: @simple ; no expensive address computation in the preheader ; X32-NOT: imul ; X32: %loop ; no complex address modes -; X32-NOT: (%{{[^)]+}},%{{[^)]+}}, +; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind { entry: br label %loop @@ -103,7 +103,7 @@ ; X32-NOT: mov{{.*}}(%esp){{$}} ; X32: %for.body{{$}} ; no complex address modes -; X32-NOT: (%{{[^)]+}},%{{[^)]+}}, +; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, ; no reloads ; X32-NOT: (%esp) define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { Index: utils/TableGen/DAGISelMatcherGen.cpp =================================================================== --- utils/TableGen/DAGISelMatcherGen.cpp +++ utils/TableGen/DAGISelMatcherGen.cpp @@ -305,7 +305,7 @@ const SDNodeInfo &CInfo = CGP.getSDNodeInfo(N->getOperator()); // If this is an 'and R, 1234' where the operation is AND/OR and the RHS is - // a constant without a predicate fn that has more that one bit set, handle + // a constant without a predicate fn that has more than one bit set, handle // this as a special case. This is usually for targets that have special // handling of certain large constants (e.g. alpha with it's 8/16/32-bit // handling stuff). Using these instructions is often far more efficient