diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -1173,6 +1173,11 @@ if (ChangedLoop) SE->forgetLoop(L); + // The insertion point for the widening should be at the widenably call, not + // at the WidenableBR. If we do this at the widenableBR, we can incorrectly + // change a loop-invariant condition to a loop-varying one. + auto *IP = cast(WidenableBR->getCondition()); + // The use of umin(all analyzeable exits) instead of latch is subtle, but // important for profitability. We may have a loop which hasn't been fully // canonicalized just yet. If the exit we chose to widen is provably never @@ -1182,21 +1187,9 @@ const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L); if (isa(MinEC) || MinEC->getType()->isPointerTy() || !SE->isLoopInvariant(MinEC, L) || - !Rewriter.isSafeToExpandAt(MinEC, WidenableBR)) + !Rewriter.isSafeToExpandAt(MinEC, IP)) return ChangedLoop; - // Subtlety: We need to avoid inserting additional uses of the WC. We know - // that it can only have one transitive use at the moment, and thus moving - // that use to just before the branch and inserting code before it and then - // modifying the operand is legal. - auto *IP = cast(WidenableBR->getCondition()); - // Here we unconditionally modify the IR, so after this point we should return - // only `true`! - IP->moveBefore(WidenableBR); - if (MSSAU) - if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP)) - MSSAU->moveToPlace(MUD, WidenableBR->getParent(), - MemorySSA::BeforeTerminator); Rewriter.setInsertPoint(IP); IRBuilder<> B(IP); diff --git a/llvm/test/Transforms/LoopPredication/pr61963.ll b/llvm/test/Transforms/LoopPredication/pr61963.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopPredication/pr61963.ll @@ -0,0 +1,60 @@ +; RUN: opt -S -passes=loop-predication < %s 2>&1 | FileCheck %s + +; Do not convert branch in loop_outer block on the widenable_cond11 to a +; loop-varying one. +; It will result in a miscompile. +; deopt9 will have incorrect deopt state (it currently uses init_val because +; indvars identified that if that exit is taken, it will be taken on first +; iteration, since widenable_cond11 is a loop-invariant condition). +; CHECK-LABEL: foo +; CHECK-LABEL: loop_outer +; CHECK: br i1 %widenable_cond11, label %inner_loop_ph, label %deopt9 +define i32 @foo(ptr addrspace(1) %arg) { +entry: + %init_val = load i32, ptr addrspace(1) %arg, align 4 + %widenable_cond11 = call i1 @llvm.experimental.widenable.condition() + br label %loop_outer + +loop_outer: ; preds = %outer_loop_latch, %entry + %iv = phi i32 [ %phi36, %outer_loop_latch ], [ 42, %entry ] + %phi21 = phi i32 [ %add39, %outer_loop_latch ], [ %init_val, %entry ] + %add27 = add i32 %iv, 1 + %icmp28 = icmp eq i32 %add27, 60 + br i1 %widenable_cond11, label %inner_loop_ph, label %deopt9 + +inner_loop_ph: ; preds = %loop_outer + store atomic i32 606, ptr addrspace(1) %arg unordered, align 4 + br label %inner_loop + +inner_loop: ; preds = %inner_loop_latch, %inner_loop_ph + %phi43 = phi i32 [ 1, %inner_loop_ph ], [ %add55, %inner_loop_latch ] + %phi44 = phi i32 [ %add27, %inner_loop_ph ], [ %add48, %inner_loop_latch ] + %add48 = add i32 %phi44, 1 + %icmp49 = icmp eq i32 %add48, 0 + br i1 %icmp49, label %deopt57, label %inner_loop_latch + +inner_loop_latch: ; preds = %inner_loop + store atomic i32 606, ptr addrspace(1) %arg unordered, align 4 + %add55 = add nuw nsw i32 %phi43, 1 + %exitcond = icmp eq i32 %add55, 10 + br i1 %exitcond, label %outer_loop_latch, label %inner_loop + +outer_loop_latch: ; preds = %inner_loop_latch + %phi36 = phi i32 [ %add48, %inner_loop_latch ] + %add39 = add i32 %phi21, 1 + br label %loop_outer + +deopt9: ; preds = %loop_outer + %lcssa = phi i32 [ %init_val, %loop_outer ] + %call53 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 13) [ "deopt"(i32 0, i32 1, i32 1291853205, i32 127, i32 3, i32 13, i32 0, i32 0, i32 8, i32 3, i32 1, i32 3, i32 606, i32 7, ptr null, i32 7, ptr null, i32 3, i32 52, i32 7, ptr null, i32 3, i32 36, i32 7, ptr null, i32 7, ptr null, i32 3, i32 1, i32 3, i32 0, i32 0, i32 8, i32 3, i32 %lcssa, i32 3, i32 42, i32 7, ptr null) ] + ret i32 %call53 + +deopt57: ; preds = %inner_loop + %call62 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 12) [ "deopt"(i32 0, i32 1, i32 1291853205, i32 99, i32 2, i32 13, i32 0, i32 3, i32 0, i32 3, i32 0, i32 7, ptr null, i32 7, ptr null, i32 3, i32 0, i32 7, ptr null, i32 3, i32 24, i32 7, ptr null, i32 7, ptr null, i32 3, i32 1, i32 3, i32 60, i32 0, i32 8, i32 3, i32 59, i32 3, i32 poison, i32 7, ptr null) ] + ret i32 %call62 +} + +declare i32 @llvm.experimental.deoptimize.i32(...) + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) +declare noundef i1 @llvm.experimental.widenable.condition()