Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -3774,24 +3774,6 @@ } } -/// Check whether value has nuw/nsw/exact set but SCEV does not. -/// TODO: In reality it is better to check the poison recursevely -/// but this is better than nothing. -static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { - if (auto *I = dyn_cast(V)) { - if (isa(I)) { - if (auto *NS = dyn_cast(S)) { - if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) - return true; - if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) - return true; - } - } else if (isa(I) && I->isExact()) - return true; - } - return false; -} - /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -3805,7 +3787,7 @@ // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second && !SCEVLostPoisonFlags(S, V)) { + if (Pair.second) { ExprValueMap[S].insert({V, nullptr}); // If S == Stripped + Offset, add Stripped -> {V, Offset} into Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1688,9 +1688,28 @@ return V; } +/// Check whether value has nuw/nsw/exact set but SCEV does not. +/// TODO: In reality it is better to check the poison recursevely +/// but this is better than nothing. +static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { + if (auto *I = dyn_cast(V)) { + if (isa(I)) { + if (auto *NS = dyn_cast(S)) { + if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) + return true; + if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) + return true; + } + } else if (isa(I) && I->isExact()) + return true; + } + return false; +} + ScalarEvolution::ValueOffsetPair SCEVExpander::FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt) { + ScalarEvolution::ValueOffsetPair Candidate = { nullptr, nullptr }; SetVector *Set = SE.getSCEVValues(S); // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. @@ -1709,11 +1728,24 @@ EntInst->getFunction() == InsertPt->getFunction() && SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) - return {V, Offset}; + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) { + if (!SCEVLostPoisonFlags(S, V)) + return { V, Offset }; + // If SCEV lost poison flag we try to find better V if we can. + // if we fail to do so then we will strip the poison flags from V. + // Ignore current if we already have a candidate. + if (!Candidate.first) + Candidate = { V, Offset }; + } } } } + if (Candidate.first) { + // We have a candidate and failed to find a better candidate we should + // strip poison flags from it due to SCEV does not have ones. + cast(Candidate.first)->dropPoisonGeneratingFlags(); + return Candidate; + } return {nullptr, nullptr}; } Index: test/Transforms/LoopStrengthReduce/duplicate-mul.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/duplicate-mul.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.main-arm-none-eabi" + +; Test checks that SCEV exapnder does not produce new mul instruction. + +@pp280.i = external constant i32 +@pp281.i = external constant i32 + +; Function Attrs: norecurse nounwind +define hidden arm_aapcs_vfpcc void @main(i32 %argc, i8** %argv) local_unnamed_addr #0 { +entry: + %p280 = load i32, i32* @pp280.i, align 4 + %p281 = load i32, i32* @pp281.i, align 4 +; CHECK: = mul +; CHECK-NOT: = mul + %mul86.i = mul nuw nsw i32 %p280, %p281 + %cmp90.i = icmp eq i32 %mul86.i, 230416 + br i1 %cmp90.i, label %for.body.lr.ph.i, label %exit + +for.body.lr.ph.i: + %cmp104243.i = icmp eq i32 %mul86.i, 0 + br i1 %cmp104243.i, label %exit, label %for.body.us.preheader.i + +for.body.us.preheader.i: + %v14 = add nsw i32 %mul86.i, -1 + %xtraiter289.i = and i32 %mul86.i, 3 + %v15 = icmp ult i32 %v14, 3 + %unroll_iter293.i = sub nsw i32 %mul86.i, %xtraiter289.i + br label %for.body.us.i + +for.body.us.i: + %loop_cnt.0248.us.i = phi i32 [ %inc146.us.i, %for.cond103 ], [ 0, %for.body.us.preheader.i ] + br i1 %v15, label %for.cond103, label %for.body106.us.i + +for.body106.us.i: + %niter294.i = phi i32 [ %unroll_iter293.i, %for.body.us.i ], [ %niter294.nsub.3.i, %for.body106.us.i ] + %niter294.nsub.3.i = add i32 %niter294.i, -4 + %niter294.ncmp.3.i = icmp eq i32 %niter294.nsub.3.i, 0 + br i1 %niter294.ncmp.3.i, label %for.cond103, label %for.body106.us.i + +for.cond103: + %inc146.us.i = add nuw i32 %loop_cnt.0248.us.i, 1 + %cmp101.us.i = icmp ult i32 %inc146.us.i, %argc + br i1 %cmp101.us.i, label %for.body.us.i, label %exit + +exit: + ret void +} + + +attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m33" "target-features"="+d16,+dsp,+fp-armv8,+fp-only-sp,+hwdiv,+thumb-mode,-crc,-crypto,-dotprod,-fullfp16,-hwdiv-arm,-neon,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" } +