diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -343,6 +343,11 @@ Optional getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L); + /// Returns a suitable insert point after \p I, that dominates \p + /// MustDominate. Skips instructions inserted by the expander. + BasicBlock::iterator findInsertPointAfter(Instruction *I, + Instruction *MustDominate); + private: LLVMContext &getContext() const { return SE.getContext(); } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -39,8 +39,7 @@ using namespace PatternMatch; /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, -/// reusing an existing cast if a suitable one exists, moving an existing -/// cast if a suitable one exists but isn't in the right place, or +/// reusing an existing cast if a suitable one (= dominating IP) exists, or /// creating a new one. Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, @@ -59,40 +58,37 @@ Instruction *Ret = nullptr; // Check to see if there is already a cast! - for (User *U : V->users()) - if (U->getType() == Ty) - if (CastInst *CI = dyn_cast(U)) - if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, create a new cast at IP. - // Likewise, do not reuse a cast at BIP because it must dominate - // instructions that might be inserted before BIP. - if (BasicBlock::iterator(CI) != IP || BIP == IP) { - // Create a new cast, and leave the old cast in place in case - // it is being used as an insert point. - Ret = CastInst::Create(Op, V, Ty, "", &*IP); - Ret->takeName(CI); - CI->replaceAllUsesWith(Ret); - break; - } - Ret = CI; - break; - } + for (User *U : V->users()) { + if (U->getType() != Ty) + continue; + CastInst *CI = dyn_cast(U); + if (!CI || CI->getOpcode() != Op) + continue; + + // Found a suitable cast that is at IP or comes before IP. Use it. + if (IP->getParent() == CI->getParent() && + (&*IP == CI || CI->comesBefore(&*IP))) { + Ret = CI; + break; + } + } // Create a new cast. - if (!Ret) + if (!Ret) { Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); + rememberInstruction(Ret); + } // We assert at the end of the function since IP might point to an // instruction with different dominance properties than a cast // (an invoke for example) and not dominate BIP (but the cast does). assert(SE.DT.dominates(Ret, &*BIP)); - rememberInstruction(Ret); return Ret; } -static BasicBlock::iterator findInsertPointAfter(Instruction *I, - BasicBlock *MustDominate) { +BasicBlock::iterator +SCEVExpander::findInsertPointAfter(Instruction *I, Instruction *MustDominate) { BasicBlock::iterator IP = ++I->getIterator(); if (auto *II = dyn_cast(I)) IP = II->getNormalDest()->begin(); @@ -103,11 +99,17 @@ if (isa(IP) || isa(IP)) { ++IP; } else if (isa(IP)) { - IP = MustDominate->getFirstInsertionPt(); + IP = MustDominate->getParent()->getFirstInsertionPt(); } else { assert(!IP->isEHPad() && "unexpected eh pad!"); } + // Adjust insert point to be after instructions inserted by the expander, so + // we can re-use already inserted instructions. Avoid skipping past the + // original \p MustDominate, in case it is an inserted instruction. + while (isInsertedInstruction(&*IP) && &*IP != MustDominate) + ++IP; + return IP; } @@ -167,7 +169,7 @@ // Cast the instruction immediately after the instruction. Instruction *I = cast(V); - BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); + BasicBlock::iterator IP = findInsertPointAfter(I, &*Builder.GetInsertPoint()); return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -1526,7 +1528,7 @@ Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - findInsertPointAfter(cast(V), Builder.GetInsertBlock()); + findInsertPointAfter(cast(V), &*Builder.GetInsertPoint()); V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, &*NewInsertPt, false); return V; diff --git a/llvm/test/Transforms/LoopDistribute/bounds-expansion-bug.ll b/llvm/test/Transforms/LoopDistribute/bounds-expansion-bug.ll --- a/llvm/test/Transforms/LoopDistribute/bounds-expansion-bug.ll +++ b/llvm/test/Transforms/LoopDistribute/bounds-expansion-bug.ll @@ -69,12 +69,10 @@ br label %for.body ; CHECK: join -; CHECK: {{%[0-9a-z]+}} = bitcast i32* %c to i8* ; CHECK: {{%[0-9a-z]+}} = bitcast i32* %a to i8* -; CHECK: [[OLD_C:%[0-9a-z]+]] = bitcast i32* %c to i8* -; CHECK: [[OLD_A:%[0-9a-z]+]] = bitcast i32* %a to i8* -; CHECK-NOT: [[OLD_C]] -; CHECK-NOT: [[OLD_A]] +; CHECK: {{%[0-9a-z]+}} = bitcast i32* %c to i8* +; CHECK-NOT: bitcast i32* %c to i8* +; CHECK-NOT: bitcast i32* %a to i8* for.body: ; preds = %for.body, %entry %ind = phi i64 [ 0, %join ], [ %add, %for.body ] diff --git a/llvm/test/Transforms/LoopIdiom/reuse-cast.ll b/llvm/test/Transforms/LoopIdiom/reuse-cast.ll --- a/llvm/test/Transforms/LoopIdiom/reuse-cast.ll +++ b/llvm/test/Transforms/LoopIdiom/reuse-cast.ll @@ -83,14 +83,14 @@ define void @reuse_cast_2(i32 %x, i32* %ptr.1.start) { ; CHECK-LABEL: @reuse_cast_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTR_1_START1:%.*]] = bitcast i32* [[PTR_1_START:%.*]] to i8* +; CHECK-NEXT: [[PTR_1_START2:%.*]] = bitcast i32* [[PTR_1_START:%.*]] to i8* ; CHECK-NEXT: [[STACK:%.*]] = alloca [2 x i32], align 4 -; CHECK-NEXT: [[CAST_TO_REUSE:%.*]] = bitcast [2 x i32]* [[STACK]] to i8* +; CHECK-NEXT: [[STACK1:%.*]] = bitcast [2 x i32]* [[STACK]] to i8* ; CHECK-NEXT: [[C_0:%.*]] = icmp sgt i32 [[X:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[STACK]] to i8* +; CHECK-NEXT: [[CAST_TO_REUSE:%.*]] = bitcast [2 x i32]* [[STACK]] to i8* ; CHECK-NEXT: [[PTR_2_START:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[STACK]], i64 0, i64 0 ; CHECK-NEXT: call void @use.i8(i8* [[CAST_TO_REUSE]]) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[CAST_TO_REUSE]], i8* align 4 [[PTR_1_START1]], i64 8, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[STACK1]], i8* align 4 [[PTR_1_START2]], i64 8, i1 false) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll b/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll --- a/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll @@ -37,7 +37,7 @@ ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[LSR_IV]] to %struct.L* -; CHECK-NEXT: [[CMP:%.*]] = icmp eq %struct.L* [[UGLYGEP1]], [[TMP]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq %struct.L* [[UGLYGEP1]], [[TMP5]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]]