Index: include/llvm/Transforms/Scalar/Reassociate.h =================================================================== --- include/llvm/Transforms/Scalar/Reassociate.h +++ include/llvm/Transforms/Scalar/Reassociate.h @@ -25,6 +25,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" @@ -56,13 +57,16 @@ /// Reassociate commutative expressions. class ReassociatePass : public PassInfoMixin { + LoopInfo *LI; + ScalarEvolution *SE; DenseMap RankMap; DenseMap, unsigned> ValueRankMap; SetVector> RedoInsts; bool MadeChange; public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + PreservedAnalyses runImpl(Function &F, LoopInfo *LI_, ScalarEvolution *SE_); private: void BuildRankMap(Function &F); Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -1969,6 +1970,16 @@ if (!isa(I)) return; + // If I contains NSW/NUW flag and it is potentially an IV use, don't do + // reassociation for it so that the NSW/NUW will be kept and IndVar may do + // better IV use Widening. LSR can also do reassociation for IV use later + // so we will not lose the benefit of reassociation. + Loop *Loop = LI->getLoopFor(I->getParent()); + if (Loop && Loop->isLoopSimplifyForm() && isa(I) && + (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()) && + SE->isSCEVable(I->getType()) && isa(SE->getSCEV(I))) + return; + if (I->getOpcode() == Instruction::Shl && isa(I->getOperand(1))) // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. @@ -2173,7 +2184,17 @@ RewriteExprTree(I, Ops); } -PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { +PreservedAnalyses ReassociatePass::run(Function &F, + FunctionAnalysisManager &AM) { + return runImpl(F, &AM.getResult(F), + &AM.getResult(F)); +} + +PreservedAnalyses ReassociatePass::runImpl(Function &F, LoopInfo *LI_, + ScalarEvolution *SE_) { + LI = LI_; + SE = SE_; + // Calculate the rank map for F. BuildRankMap(F); @@ -2240,12 +2261,16 @@ if (skipFunction(F)) return false; - FunctionAnalysisManager DummyFAM; - auto PA = Impl.run(F, DummyFAM); + auto PA = + Impl.runImpl(F, &getAnalysis().getLoopInfo(), + &getAnalysis().getSE()); return !PA.areAllPreserved(); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesCFG(); AU.addPreserved(); } @@ -2253,8 +2278,12 @@ } char ReassociateLegacyPass::ID = 0; -INITIALIZE_PASS(ReassociateLegacyPass, "reassociate", - "Reassociate expressions", false, false) +INITIALIZE_PASS_BEGIN(ReassociateLegacyPass, "reassociate", + "Reassociate expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END(ReassociateLegacyPass, "reassociate", + "Reassociate expressions", false, false) // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { Index: test/Transforms/Reassociate/ivuse.ll =================================================================== --- test/Transforms/Reassociate/ivuse.ll +++ test/Transforms/Reassociate/ivuse.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -S -reassociate | FileCheck %s +; RUN: opt < %s -passes='reassociate' -S | FileCheck %s +; Check %add and %sub1 are not reassociated so their nsw flags are kept. It is better for IndVar to do iv use widening. + +define void @foo(i32 %size) { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ] + %sub = add nsw i32 %size, -1 + %cmp = icmp slt i32 %i.0, %sub + br i1 %cmp, label %for.body, label %for.end + +; CHECK-LABEL: @foo +; CHECK: for.body: +; CHECK-NEXT: %add = add nsw i32 %i.0, %size +; CHECK-NEXT: %sub1 = add nsw i32 %add, -1 + +for.body: + %add = add nsw i32 %i.0, %size + %sub1 = add nsw i32 %add, -1 + %idxprom = sext i32 %sub1 to i64 + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @maxarray, i64 0, i64 %idxprom + %tmp0 = load i32, i32* %arrayidx, align 4 + %tmp1 = load i32, i32* @total, align 4 + %add2 = add nsw i32 %tmp1, %tmp0 + store i32 %add2, i32* @total, align 4 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret void +} + +@maxarray = common local_unnamed_addr global [1000 x i32] zeroinitializer, align 16 +@total = common local_unnamed_addr global i32 0, align 4 +