Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -24,10 +24,12 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +using namespace PatternMatch; /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, /// reusing an existing cast if a suitable one exists, moving an existing @@ -751,25 +753,30 @@ // out of loops. Value *Prod = nullptr; for (SmallVectorImpl >::iterator - I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) { const SCEV *Op = I->second; if (!Prod) { // This is the first operand. Just expand it. Prod = expand(Op); - ++I; } else if (Op->isAllOnesValue()) { // Instead of doing a multiply by negative one, just do a negate. Prod = InsertNoopCastOfTo(Prod, Ty); Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); - ++I; } else { // A simple mul. Value *W = expandCodeFor(Op, Ty); Prod = InsertNoopCastOfTo(Prod, Ty); // Canonicalize a constant to the RHS. if (isa(Prod)) std::swap(Prod, W); - Prod = InsertBinop(Instruction::Mul, Prod, W); - ++I; + const APInt *RHS; + if (match(W, m_Power2(RHS))) { + // Canonicalize Prod*(1<isVectorTy() && "vector types are not SCEVable"); + Prod = InsertBinop(Instruction::Shl, Prod, + ConstantInt::get(Ty, RHS->logBase2())); + } else { + Prod = InsertBinop(Instruction::Mul, Prod, W); + } } } Index: test/Transforms/LoopIdiom/basic.ll =================================================================== --- test/Transforms/LoopIdiom/basic.ll +++ test/Transforms/LoopIdiom/basic.ll @@ -69,7 +69,7 @@ ret void ; CHECK-LABEL: @test2( ; CHECK: br i1 %cmp10, -; CHECK: %0 = mul i64 %Size, 4 +; CHECK: %0 = shl i64 %Size, 2 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %0, i32 4, i1 false) ; CHECK-NOT: store } Index: test/Transforms/LoopReroll/nonconst_lb.ll =================================================================== --- test/Transforms/LoopReroll/nonconst_lb.ll +++ test/Transforms/LoopReroll/nonconst_lb.ll @@ -52,7 +52,7 @@ ; CHECK: %0 = add i32 %n, -1 ; CHECK: %1 = sub i32 %0, %m ; CHECK: %2 = lshr i32 %1, 2 -; CHECK: %3 = mul i32 %2, 4 +; CHECK: %3 = shl i32 %2, 2 ; CHECK: %4 = add i32 %m, %3 ; CHECK: %5 = add i32 %4, 3 ; CHECK: br label %for.body @@ -132,7 +132,7 @@ ; CHECK: %0 = add i32 %n, -1 ; CHECK: %1 = sub i32 %0, %rem ; CHECK: %2 = lshr i32 %1, 2 -; CHECK: %3 = mul i32 %2, 4 +; CHECK: %3 = shl i32 %2, 2 ; CHECK: %4 = add i32 %rem, %3 ; CHECK: %5 = add i32 %4, 3 ; CHECK: br label %for.body Index: test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll =================================================================== --- test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll +++ test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll @@ -12,8 +12,8 @@ ; CHECK-LABEL: @test( ; multiplies are hoisted out of the loop ; CHECK: while.body.lr.ph: -; CHECK: mul i64 -; CHECK: mul i64 +; CHECK: shl i64 +; CHECK: shl i64 ; GEPs are ugly ; CHECK: while.body: ; CHECK: phi Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -6,7 +6,7 @@ ; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast ; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1 -; CHECK: [[r3:%[a-z0-9]+]] = mul i64 [[r2]], 2 +; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1 ; CHECK: br label %for.body ; CHECK: for.body: ; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ] Index: test/Transforms/LoopStrengthReduce/shl.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/shl.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-reduce -gvn -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" + +define void @_Z3fooPfll(float* nocapture readonly %input, i64 %n, i64 %s) { +; CHECK-LABEL: @_Z3fooPfll( +entry: + %mul = shl nsw i64 %s, 2 +; CHECK: %mul = shl i64 %s, 2 + tail call void @_Z3bazl(i64 %mul) #2 +; CHECK-NEXT: call void @_Z3bazl(i64 %mul) + %cmp.5 = icmp sgt i64 %n, 0 + br i1 %cmp.5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %i.06 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds float, float* %input, i64 %i.06 +; LoopStrengthReduce should reuse %mul as the stride. +; CHECK: getelementptr i1, i1* {{[^,]+}}, i64 %mul + %0 = load float, float* %arrayidx, align 4 + tail call void @_Z3barf(float %0) #2 + %add = add nsw i64 %i.06, %s + %cmp = icmp slt i64 %add, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit +} + +declare void @_Z3bazl(i64) + +declare void @_Z3barf(float)