Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2827,6 +2827,9 @@ // TargetLowering::LowerCall that perform tail call conversions. bool IsTailCall = false; + // Is Call lowering done post SelectionDAG type legalization. + bool IsPostTypeLegalization = false; + unsigned NumFixedArgs = -1; CallingConv::ID CallConv = CallingConv::C; SDValue Callee; @@ -2838,7 +2841,7 @@ SmallVector OutVals; SmallVector Ins; SmallVector InVals; - + CallLoweringInfo(SelectionDAG &DAG) : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), @@ -2949,6 +2952,11 @@ return *this; } + CallLoweringInfo &setIsPostTypeLegalization(bool Value=true){ + IsPostTypeLegalization = Value; + return *this; + } + ArgListTy &getArgs() { return Args; } Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1991,7 +1991,8 @@ std::move(Args)) .setTailCall(isTailCall) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -2029,7 +2030,8 @@ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -3565,16 +3567,11 @@ SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); } - BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(0, dl)); - TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(1, dl)); - // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, make sure the - // node has no more uses. The above EXTRACT_ELEMENT nodes should have been - // folded. - assert(Ret->use_empty() && - "Unexpected uses of illegally type from expanded lib call."); + assert((Ret.getOpcode() == ISD::BUILD_PAIR || + Ret.getOpcode() == ISD::MERGE_VALUES ) && + "Ret value is a collection of constituent nodes holding result."); + BottomHalf = Ret.getOperand(0); + TopHalf = Ret.getOperand(1); } if (isSigned) { Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8085,6 +8085,9 @@ SmallVector ReturnValues; if (!CanLowerReturn) { + unsigned NumValues; + SmallVector Chains; + // The instruction result is the result of loading from the // hidden sret parameter. SmallVector PVTs; @@ -8092,11 +8095,37 @@ ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; + EVT PartPtrVT = PVTs[0]; - unsigned NumValues = RetTys.size(); + // During node legalization expansion of larger integer + // results into constituents is done during call lowering, + // whereas during type legalization this is handled post + // call lowering. + if (!CLI.IsPostTypeLegalization) { + NumValues = RetTys.size(); + } else { + int i = 0; + LLVMContext &LC = OrigRetTy->getContext(); + EVT OrigRetVT = getValueType(DL, OrigRetTy); + MVT PartVT = getRegisterType(LC, OrigRetVT); + PartPtrVT = getValueType(DL, + PointerType::getUnqual(EVT(PartVT).getTypeForEVT(LC))); + + NumValues = getNumRegisters(LC, OrigRetVT); + unsigned ByteOffset = (PartVT.getSizeInBits()+7)/8; + Offsets.resize(NumValues); + for(auto & val : Offsets) { + val = i*ByteOffset; + i++; + } + + RetTys.clear(); + RetTys.resize(NumValues); + std::for_each(RetTys.begin(),RetTys.end(), + [&](EVT & val){val = EVT(PartVT);}); + } ReturnValues.resize(NumValues); - SmallVector Chains(NumValues); + Chains.resize(NumValues); // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. @@ -8104,9 +8133,9 @@ Flags.setNoUnsignedWrap(true); for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, + SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PartPtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT), Flags); + PartPtrVT), Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1410,6 +1410,19 @@ return false; } + if (AM.Scale == 1) { + if (AM.Base_Reg == N) { + SDValue Base_Reg = AM.Base_Reg; + AM.Base_Reg = AM.IndexReg; + AM.IndexReg = Base_Reg; + AM.Scale++; + return false; + } else if (AM.IndexReg == N) { + AM.Scale++; + return false; + } + } + // Otherwise, we cannot select it. return true; } Index: lib/Target/X86/X86OptimizeLEAs.cpp =================================================================== --- lib/Target/X86/X86OptimizeLEAs.cpp +++ lib/Target/X86/X86OptimizeLEAs.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -65,8 +66,8 @@ public: MemOpKey(const MachineOperand *Base, const MachineOperand *Scale, const MachineOperand *Index, const MachineOperand *Segment, - const MachineOperand *Disp) - : Disp(Disp) { + const MachineOperand *Disp, bool DispCheck = false) + : Disp(Disp),HardDispCheck(DispCheck) { Operands[0] = Base; Operands[1] = Scale; Operands[2] = Index; @@ -83,7 +84,10 @@ // matters that they use the same symbol/index/address. Immediates' or // offsets' differences will be taken care of during instruction // substitution. - return isSimilarDispOp(*Disp, *Other.Disp); + if (HardDispCheck) + return isIdenticalOp(*Disp,*Other.Disp); + else + return isSimilarDispOp(*Disp, *Other.Disp); } // Address' base, scale, index and segment operands. @@ -91,6 +95,9 @@ // Address' displacement operand. const MachineOperand *Disp; + + // Forces absolute displacement check. + bool HardDispCheck; }; } // end anonymous namespace @@ -178,6 +185,18 @@ &MI.getOperand(N + X86::AddrDisp)); } +static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) { + static MachineOperand DummyScale = MachineOperand::CreateImm(1); + assert((isLEA(MI) || MI.mayLoadOrStore()) && + "The instruction must be a LEA, a load or a store"); + return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), + &DummyScale, + &MI.getOperand(N + X86::AddrIndexReg), + &MI.getOperand(N + X86::AddrSegmentReg), + &MI.getOperand(N + X86::AddrDisp),true); +} + + static inline bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2) { return MO1.isIdenticalTo(MO2) && @@ -228,6 +247,12 @@ /// been calculated by LEA. Also, remove redundant LEAs. bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + } + private: typedef DenseMap> MemOpMap; @@ -261,6 +286,13 @@ /// distance between them. void findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs); + /// \brief Find all LEA instructions in the basic block that have same + /// Base, Index, Disp and Segment. + void populateCSEMap(const MachineBasicBlock &MBB, MemOpMap &LEAs); + + bool cseLEAs(const MachineBasicBlock &MBB); + + /// \brief Removes redundant address calculations. bool removeRedundantAddrCalc(MemOpMap &LEAs); @@ -276,6 +308,7 @@ DenseMap InstrPos; MachineRegisterInfo *MRI; + MachineDominatorTree *DT; const X86InstrInfo *TII; const X86RegisterInfo *TRI; @@ -462,6 +495,14 @@ } } +void OptimizeLEAPass::populateCSEMap(const MachineBasicBlock &MBB, MemOpMap &LEAs) { + for (auto &MI : MBB) { + if (isLEA(MI)) + LEAs[getMemOpCSEKey(MI, 1)].push_back(const_cast(&MI)); + } +} + + // Try to find load and store instructions which recalculate addresses already // calculated by some LEA and replace their memory operands with its def // register. @@ -646,6 +687,59 @@ return Changed; } +bool OptimizeLEAPass::cseLEAs(const MachineBasicBlock &MBB) { + MemOpMap LEAs; + bool cseDone = false; + + populateCSEMap(MBB,LEAs); + + auto CompareFn = + [] (const MachineInstr *Arg1,const MachineInstr *Arg2) -> bool { + if(Arg1->getOperand(2).getImm() < Arg2->getOperand(2).getImm()) + return false; + return true; + }; + + // Loop over all entries in the table. + for (auto &E : LEAs) { + auto &List = E.second; + if(List.size() > 1) { + std::sort(List.begin(),List.end(),CompareFn); + } + // Loop over all LEA pairs. + for(auto LII = List.begin(); LII != List.end(); LII++) { + MachineInstr &LI1 = **LII; + auto LINext = std::next(LII); + if(LINext == List.end()) + break; + MachineInstr &LI2 = **LINext; + if (!DT->dominates(&LI2,&LI1)) + continue; + + int Scale1 = LI1.getOperand(2).getImm(); + int Scale2 = LI2.getOperand(2).getImm(); + assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg"); + DebugLoc DL = LI1.getDebugLoc(); + + if ((Scale1 - Scale2) > 0) { + MachineInstr *NewMI = + BuildMI(*(const_cast(&MBB)), + &LI1,DL,TII->get(LI1.getOpcode())) + .addDef(LI1.getOperand(0).getReg()) // Dst = Dst of LI1. + .addUse(LI2.getOperand(0).getReg()) // Base = Dst of LI2. + .addImm(Scale1-Scale2) // Scale = Diff b/w scales. + .addUse(LI1.getOperand(3).getReg()) // Index = Index of LI1. + .addImm(0) // Disp = 0 + .addUse(LI1.getOperand(5).getReg()); // Segment = Segmant of LI1. + + LI1.eraseFromParent(); + cseDone = NewMI != nullptr; + } + } + } + return cseDone; +} + bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; @@ -655,12 +749,16 @@ MRI = &MF.getRegInfo(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); + DT = &getAnalysis(); // Process all basic blocks. for (auto &MBB : MF) { MemOpMap LEAs; InstrPos.clear(); + // Attempt CSE over LEAs. + Changed |= cseLEAs(MBB); + // Find all LEA instructions in basic block. findLEAs(MBB, LEAs); @@ -675,6 +773,7 @@ // a code size gain is expected from this part of the pass. if (MF.getFunction()->optForSize()) Changed |= removeRedundantAddrCalc(LEAs); + } return Changed; Index: test/CodeGen/WebAssembly/umulo-i64.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/umulo-i64.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s +; Test that UMULO works correctly on 64-bit operands. +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-emscripten" + +; CHECK-LABEL: _ZN4core3num21_$LT$impl$u20$u64$GT$15overflowing_mul17h07be88b4cbac028fE: +; CHECK: __multi3 +; Function Attrs: inlinehint +define void @"_ZN4core3num21_$LT$impl$u20$u64$GT$15overflowing_mul17h07be88b4cbac028fE"(i64, i64) unnamed_addr #0 { +start: + %2 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %0, i64 %1) + %3 = extractvalue { i64, i1 } %2, 0 + store i64 %3, i64* undef + unreachable +} + +; Function Attrs: nounwind readnone speculatable +declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1 + +attributes #0 = { inlinehint } +attributes #1 = { nounwind readnone speculatable } + + + Index: test/CodeGen/X86/lea-opt-cse.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/lea-opt-cse.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.SA = type { i32 , i32 , i32 , i32 ,i32 } + +define void @test_func(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr { +; X86-LABEL: test_func: +; X86: # BB#0: +; X86-NEXT: movl (%rdi), %eax +; X86-NEXT: movl 16(%rdi), %ecx +; X86-NEXT: leal 1(%rax,%rcx), %eax +; X86-NEXT: movl %eax, 12(%rdi) +; X86-NEXT: leal (%eax,%rcx), %eax +; X86-NEXT: movl %eax, 16(%rdi) +; X86-NEXT: retq + entry: + %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0 + %0 = load i32, i32* %h0, align 8 + %h3 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 3 + %h4 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 4 + %1 = load i32, i32* %h4, align 8 + %add = add i32 %0, 1 + %add4 = add i32 %add, %1 + store i32 %add4, i32* %h3, align 4 + %add29 = add i32 %add4 , %1 + store i32 %add29, i32* %h4, align 8 + ret void +} Index: test/CodeGen/X86/mul-constant-i16.ll =================================================================== --- test/CodeGen/X86/mul-constant-i16.ll +++ test/CodeGen/X86/mul-constant-i16.ll @@ -558,11 +558,10 @@ define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; @@ -571,8 +570,7 @@ ; X64-NEXT: # kill: %EDI %EDI %RDI ; X64-NEXT: leal (%rdi,%rdi,8), %eax ; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: addl %edi, %eax -; X64-NEXT: addl %edi, %eax +; X64-NEXT: leal (%rax,%rdi,2), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 29 Index: test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- test/CodeGen/X86/mul-constant-i32.ll +++ test/CodeGen/X86/mul-constant-i32.ll @@ -1457,30 +1457,27 @@ define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal (%ecx,%ecx,8), %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_29: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI -; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: ; X64-JAG-NEXT: # kill: %EDI %EDI %RDI -; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_29: ; X86-NOOPT: # BB#0: Index: test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- test/CodeGen/X86/mul-constant-i64.ll +++ test/CodeGen/X86/mul-constant-i64.ll @@ -1523,8 +1523,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: leal (%ecx,%eax,2), %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx @@ -1532,19 +1531,17 @@ ; ; X64-HSW-LABEL: test_mul_by_29: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: -; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] -; X64-JAG-NEXT: retq # sched: [4:1.00] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_29: ; X86-NOOPT: # BB#0: Index: test/CodeGen/X86/mul-constant-result.ll =================================================================== --- test/CodeGen/X86/mul-constant-result.ll +++ test/CodeGen/X86/mul-constant-result.ll @@ -163,8 +163,7 @@ ; X86-NEXT: .LBB0_35: ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx -; X86-NEXT: addl %ecx, %eax +; X86-NEXT: leal (%ecx,%eax,2), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB0_36: @@ -322,16 +321,17 @@ ; X64-HSW-NEXT: .LBB0_31: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: jmp .LBB0_17 -; X64-HSW-NEXT: .LBB0_32: -; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx -; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: addl %eax, %ecx ; X64-HSW-NEXT: .LBB0_17: ; X64-HSW-NEXT: addl %eax, %ecx ; X64-HSW-NEXT: movl %ecx, %eax ; X64-HSW-NEXT: # kill: %EAX %EAX %RAX ; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_32: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: leal (%rcx,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_33: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $5, %ecx Index: test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll =================================================================== --- test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -13,14 +13,14 @@ ; X64-NEXT: .p2align ; X64: %loop ; no complex address modes -; X64-NOT: (%{{[^)]+}},%{{[^)]+}}, +; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, ; ; X32: @simple ; no expensive address computation in the preheader ; X32-NOT: imul ; X32: %loop ; no complex address modes -; X32-NOT: (%{{[^)]+}},%{{[^)]+}}, +; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind { entry: br label %loop @@ -103,7 +103,7 @@ ; X32-NOT: mov{{.*}}(%esp){{$}} ; X32: %for.body{{$}} ; no complex address modes -; X32-NOT: (%{{[^)]+}},%{{[^)]+}}, +; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, ; no reloads ; X32-NOT: (%esp) define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { Index: utils/TableGen/DAGISelMatcherGen.cpp =================================================================== --- utils/TableGen/DAGISelMatcherGen.cpp +++ utils/TableGen/DAGISelMatcherGen.cpp @@ -305,7 +305,7 @@ const SDNodeInfo &CInfo = CGP.getSDNodeInfo(N->getOperator()); // If this is an 'and R, 1234' where the operation is AND/OR and the RHS is - // a constant without a predicate fn that has more that one bit set, handle + // a constant without a predicate fn that has more than one bit set, handle // this as a special case. This is usually for targets that have special // handling of certain large constants (e.g. alpha with it's 8/16/32-bit // handling stuff). Using these instructions is often far more efficient