Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -158,6 +158,38 @@ if (CxtI && CxtI->getParent()) return CxtI; + // If the value is a function argument, accept assumptions defined in the + // entry block if entering the function guarantees reaching them. + if (const Argument *Arg = dyn_cast(V)) { + const Function *F = Arg->getParent(); + if (!F || F->empty()) + return nullptr; + const BasicBlock &Entry = F->getEntryBlock(); + if (Entry.size() < 2) { + // Even if the only instruction is an assumption, it cannot be used as its + // own context. + return nullptr; + } + + // First, to avoid potentially ephemeral values, try using the last + // instruction of the entry block as context. + const Instruction &Last = Entry.back(); + bool MustReachLast = true; + for (auto I = Entry.begin(), E = Last.getIterator(); I != E; ++I) { + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) { + MustReachLast = false; + break; + } + } + if (MustReachLast) + return &Last; + + // Use the first instruction in the entry block as the context. Making sure + // that control flow reaches assumptions in the entry block will be done by + // isValidAssumeForContext(). + return &Entry.front(); + } + return nullptr; } Index: llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -454,9 +454,9 @@ ; CHECK-LABEL: 'test_guard_and_assume' ; CHECK-NEXT: Classifying expressions for: @test_guard_and_assume ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4) S: [0,4) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,4) S: [0,4) Exits: (-1 + %count) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %idx = getelementptr inbounds i32, i32* %data, i64 %iv -; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,5) S: [1,5) Exits: %count LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_guard_and_assume Index: llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s + +; Make sure the loop is unrolled without a remainder loop based on an assumption +; that the lower bits are known to be zero. + +define dso_local void @assumeDivisibleTC(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i32 %n) local_unnamed_addr { +; CHECK-LABEL: @assumeDivisibleTC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[N:%.*]], 3 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[CMP110:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_011]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP0]], 3 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: store i8 [[ADD]], i8* [[ARRAYIDX4]], align 1 +; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1 +; CHECK-NEXT: [[IDXPROM_1:%.*]] = zext i32 [[INC]] to i64 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[B]], i64 [[IDXPROM_1]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX_1]], align 1 +; CHECK-NEXT: [[ADD_1:%.*]] = add i8 [[TMP1]], 3 +; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[IDXPROM_1]] +; CHECK-NEXT: store i8 [[ADD_1]], i8* [[ARRAYIDX4_1]], align 1 +; CHECK-NEXT: [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1 +; CHECK-NEXT: [[IDXPROM_2:%.*]] = zext i32 [[INC_1]] to i64 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[B]], i64 [[IDXPROM_2]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX_2]], align 1 +; CHECK-NEXT: [[ADD_2:%.*]] = add i8 [[TMP2]], 3 +; CHECK-NEXT: [[ARRAYIDX4_2:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[IDXPROM_2]] +; CHECK-NEXT: store i8 [[ADD_2]], i8* [[ARRAYIDX4_2]], align 1 +; CHECK-NEXT: [[INC_2:%.*]] = add nuw nsw i32 [[INC_1]], 1 +; CHECK-NEXT: [[IDXPROM_3:%.*]] = zext i32 [[INC_2]] to i64 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[B]], i64 [[IDXPROM_3]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[ARRAYIDX_3]], align 1 +; CHECK-NEXT: [[ADD_3:%.*]] = add i8 [[TMP3]], 3 +; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[IDXPROM_3]] +; CHECK-NEXT: store i8 [[ADD_3]], i8* [[ARRAYIDX4_3]], align 1 +; CHECK-NEXT: [[INC_3]] = add nuw nsw i32 [[INC_2]], 1 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp slt i32 [[INC_3]], [[N]] +; CHECK-NEXT: br i1 [[CMP1_3]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], [[LOOP0:!llvm.loop !.*]] +; +entry: + %and = and i32 %n, 3 + %cmp = icmp eq i32 %and, 0 + tail call void @llvm.assume(i1 %cmp) + %cmp110 = icmp sgt i32 %n, 0 + br i1 %cmp110, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %idxprom = zext i32 %i.011 to i64 + %arrayidx = getelementptr inbounds i8, i8* %b, i64 %idxprom + %0 = load i8, i8* %arrayidx, align 1 + %add = add i8 %0, 3 + %arrayidx4 = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 %add, i8* %arrayidx4, align 1 + %inc = add nuw nsw i32 %i.011, 1 + %cmp1 = icmp slt i32 %inc, %n + br i1 %cmp1, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0 +} + +declare void @llvm.assume(i1 noundef) nofree nosync nounwind willreturn +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.unroll.count", i32 4} Index: llvm/test/Transforms/SimplifyCFG/pr46638.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/pr46638.ll +++ llvm/test/Transforms/SimplifyCFG/pr46638.ll @@ -3,7 +3,7 @@ define void @pr46638(i1 %c, i32 %x) { ; CHECK-LABEL: @pr46638( -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK: [[CMP1:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP1]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]] ; CHECK: false1: @@ -19,6 +19,15 @@ ; CHECK-NEXT: call void @dummy(i32 2) ; CHECK-NEXT: ret void ; +entry: + %cmp0 = icmp sgt i32 %x, -333 + br i1 %cmp0, label %start, label %skip + +skip: + call void @dummy(i32 999) + ret void + +start: %cmp1 = icmp slt i32 %x, 0 call void @llvm.assume(i1 %cmp1) br i1 %c, label %true1, label %false1 Index: llvm/unittests/Analysis/ValueTrackingTest.cpp =================================================================== --- llvm/unittests/Analysis/ValueTrackingTest.cpp +++ llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -1488,6 +1488,65 @@ EXPECT_EQ(Known.getMaxValue(), 575); } +TEST_F(ValueTrackingTest, ComputeKnownBitsArgNoCxtI) { + // Take advantage of assumptions on arguments w/o a context. + parseAssembly( + "define void @test(i32 %x) {\n" + " %A = and i32 %x, 31\n" + " %c = icmp eq i32 %A, 0\n" + " call void @llvm.assume(i1 %c)\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits( + F->getArg(0), M->getDataLayout(), /* Depth */ 0, &AC, nullptr); + EXPECT_EQ(Known.Zero.getZExtValue(), 31u); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + +TEST_F(ValueTrackingTest, ComputeKnownBitsArgNoCxtIFront) { + // Take advantage of assumptions on arguments w/o a context. + parseAssembly( + "define void @test(i32 %x) {\n" + " %e = mul i32 %x, 7" + " %A = and i32 %x, 31\n" + " %c = icmp eq i32 %A, 0\n" + " call void @llvm.assume(i1 %c)\n" + " call void @may.not.transfer.execution.to.successor()\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n" + "declare void @may.not.transfer.execution.to.successor()\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits( + F->getArg(0), M->getDataLayout(), /* Depth */ 0, &AC, nullptr); + EXPECT_EQ(Known.Zero.getZExtValue(), 31u); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + +TEST_F(ValueTrackingTest, ComputeKnownBitsArgNoCxtIInvalid) { + // Do not take advantage of assumptions on arguments w/o a context if control + // is not guaranteed to reach them. + parseAssembly( + "define void @test(i32 %x) {\n" + " %e = mul i32 %x, 7" + " %A = and i32 %x, 31\n" + " %c = icmp eq i32 %A, 0\n" + " call void @may.not.transfer.execution.to.successor()\n" + " call void @llvm.assume(i1 %c)\n" + " call void @may.not.transfer.execution.to.successor()\n" + " ret void\n" + "}\n" + "declare void @llvm.assume(i1)\n" + "declare void @may.not.transfer.execution.to.successor()\n"); + AssumptionCache AC(*F); + KnownBits Known = computeKnownBits( + F->getArg(0), M->getDataLayout(), /* Depth */ 0, &AC, nullptr); + EXPECT_EQ(Known.Zero.getZExtValue(), 0u); + EXPECT_EQ(Known.One.getZExtValue(), 0u); +} + class IsBytewiseValueTest : public ValueTrackingTest, public ::testing::WithParamInterface< std::pair> {