diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2543,6 +2543,9 @@ CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, Value *Alignment, Value *OffsetValue = nullptr); + /// Create an assume intrinsic call that represents a nonnull + /// assumption on the provided pointer. + CallInst *CreateNonNullAssumption(Value *PtrValue); }; /// This provides a uniform API for creating instructions and inserting diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1732,7 +1732,7 @@ // parameter attribute, but that's a less interesting case because hopefully // the callee would already have been simplified based on that. if (Argument *A = dyn_cast(V)) - if (paramHasAttr(A, Attribute::NonNull)) + if (paramHasAttr(A, Attribute::NonNull) || paramHasAttr(A, Attribute::Dereferenceable)) return true; // Is this an alloca in the caller? This is distinct from the attribute case diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1410,6 +1410,12 @@ return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue); } +CallInst *IRBuilderBase::CreateNonNullAssumption(Value *PtrValue) { + Value *Vals[] = { PtrValue }; + OperandBundleDefT NonNullOpB("nonnull", Vals); + return CreateAssumption(ConstantInt::getTrue(getContext()), {NonNullOpB}); +} + IRBuilderDefaultInserter::~IRBuilderDefaultInserter() = default; IRBuilderCallbackInserter::~IRBuilderCallbackInserter() = default; IRBuilderFolder::~IRBuilderFolder() = default; diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1460,6 +1460,25 @@ } } +static void AddAssumptionsFromCallSiteAttrs(CallBase &CB, InlineFunctionInfo &IFI) { + if (!IFI.GetAssumptionCache) + return; + + AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller()); + auto &DL = CB.getCaller()->getParent()->getDataLayout(); + Function *CalledFunc = CB.getCalledFunction(); + IRBuilder<> Builder(&CB); + + for (Argument &Arg : CalledFunc->args()) { + unsigned ArgNo = Arg.getArgNo(); + auto *ArgVal = CB.getArgOperand(ArgNo); + if (CB.getAttributes().hasParamAttr(ArgNo, Attribute::NonNull) && !isKnownNonZero(ArgVal, DL, 0, AC)) { + CallInst *NewAsmp = Builder.CreateNonNullAssumption(CB.getArgOperand(ArgNo)); + AC->registerAssumption(cast(NewAsmp)); + } + } +} + static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI, @@ -2130,6 +2149,8 @@ VMap[&*I] = ActualArg; } + AddAssumptionsFromCallSiteAttrs(CB, IFI); + // TODO: Remove this when users have been updated to the assume bundles. // Add alignment assumptions if necessary. We do this before the inlined // instructions are actually cloned into the caller so that we can easily diff --git a/llvm/test/Transforms/Coroutines/ex0.ll b/llvm/test/Transforms/Coroutines/ex0.ll --- a/llvm/test/Transforms/Coroutines/ex0.ll +++ b/llvm/test/Transforms/Coroutines/ex0.ll @@ -1,7 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; First example from Doc/Coroutines.rst (two block loop) ; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s define ptr @f(i32 %n) presplitcoroutine { +; CHECK-LABEL: define noalias nonnull ptr @f +; CHECK-SAME: (i32 [[N:%.*]]) local_unnamed_addr { +; CHECK-NEXT: AfterCoroEnd: +; CHECK-NEXT: [[ALLOC:%.*]] = tail call ptr @malloc(i32 24) +; CHECK-NEXT: store ptr @f.resume, ptr [[ALLOC]], align 8 +; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], ptr [[ALLOC]], i64 0, i32 1 +; CHECK-NEXT: store ptr @f.destroy, ptr [[DESTROY_ADDR]], align 8 +; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[ALLOC]], i64 0, i32 2 +; CHECK-NEXT: store i32 [[N]], ptr [[N_VAL_SPILL_ADDR]], align 4 +; CHECK-NEXT: tail call void @print(i32 [[N]]) +; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[ALLOC]], i64 0, i32 3 +; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR1]], align 1 +; CHECK-NEXT: ret ptr [[ALLOC]] +; entry: %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) %size = call i32 @llvm.coro.size.i32() @@ -13,8 +28,8 @@ %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ] call void @print(i32 %n.val) %0 = call i8 @llvm.coro.suspend(token none, i1 false) - switch i8 %0, label %suspend [i8 0, label %resume - i8 1, label %cleanup] + switch i8 %0, label %suspend [i8 0, label %resume + i8 1, label %cleanup] resume: %inc = add i32 %n.val, 1 br label %loop @@ -24,23 +39,26 @@ call void @free(ptr %mem) br label %suspend suspend: - call i1 @llvm.coro.end(ptr %hdl, i1 0) + call i1 @llvm.coro.end(ptr %hdl, i1 0) ret ptr %hdl } -; CHECK-LABEL: @main( define i32 @main() { +; CHECK-LABEL: define i32 @main() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOC_I:%.*]] = tail call ptr @malloc(i32 24) +; CHECK-NEXT: tail call void @print(i32 4) +; CHECK-NEXT: tail call void @print(i32 5) +; CHECK-NEXT: tail call void @print(i32 6) +; CHECK-NEXT: tail call void @free(ptr nonnull [[ALLOC_I]]) +; CHECK-NEXT: ret i32 0 +; entry: %hdl = call ptr @f(i32 4) call void @llvm.coro.resume(ptr %hdl) call void @llvm.coro.resume(ptr %hdl) call void @llvm.coro.destroy(ptr %hdl) ret i32 0 -; CHECK: entry: -; CHECK: call void @print(i32 4) -; CHECK: call void @print(i32 5) -; CHECK: call void @print(i32 6) -; CHECK: ret i32 0 } declare token @llvm.coro.id(i32, ptr, ptr, ptr) @@ -50,9 +68,9 @@ declare i8 @llvm.coro.suspend(token, i1) declare void @llvm.coro.resume(ptr) declare void @llvm.coro.destroy(ptr) - + declare ptr @llvm.coro.begin(token, ptr) -declare i1 @llvm.coro.end(ptr, i1) +declare i1 @llvm.coro.end(ptr, i1) declare noalias ptr @malloc(i32) declare void @print(i32) diff --git a/llvm/test/Transforms/Coroutines/ex1.ll b/llvm/test/Transforms/Coroutines/ex1.ll --- a/llvm/test/Transforms/Coroutines/ex1.ll +++ b/llvm/test/Transforms/Coroutines/ex1.ll @@ -1,7 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; First example from Doc/Coroutines.rst (one block loop) ; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s define ptr @f(i32 %n) presplitcoroutine { +; CHECK-LABEL: define noalias nonnull ptr @f +; CHECK-SAME: (i32 [[N:%.*]]) local_unnamed_addr { +; CHECK-NEXT: AfterCoroEnd: +; CHECK-NEXT: [[ALLOC:%.*]] = tail call ptr @malloc(i32 24) +; CHECK-NEXT: store ptr @f.resume, ptr [[ALLOC]], align 8 +; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], ptr [[ALLOC]], i64 0, i32 1 +; CHECK-NEXT: store ptr @f.destroy, ptr [[DESTROY_ADDR]], align 8 +; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[ALLOC]], i64 0, i32 2 +; CHECK-NEXT: store i32 [[N]], ptr [[N_VAL_SPILL_ADDR]], align 4 +; CHECK-NEXT: tail call void @print(i32 [[N]]) +; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[ALLOC]], i64 0, i32 3 +; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1 +; CHECK-NEXT: ret ptr [[ALLOC]] +; entry: %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) %size = call i32 @llvm.coro.size.i32() @@ -14,7 +29,7 @@ call void @print(i32 %n.val) %0 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %0, label %suspend [i8 0, label %loop - i8 1, label %cleanup] + i8 1, label %cleanup] cleanup: %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) call void @free(ptr %mem) @@ -24,19 +39,22 @@ ret ptr %hdl } -; CHECK-LABEL: @main( define i32 @main() { +; CHECK-LABEL: define i32 @main() local_unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOC_I:%.*]] = tail call ptr @malloc(i32 24) +; CHECK-NEXT: tail call void @print(i32 4) +; CHECK-NEXT: tail call void @print(i32 5) +; CHECK-NEXT: tail call void @print(i32 6) +; CHECK-NEXT: tail call void @free(ptr nonnull [[ALLOC_I]]) +; CHECK-NEXT: ret i32 0 +; entry: %hdl = call ptr @f(i32 4) call void @llvm.coro.resume(ptr %hdl) call void @llvm.coro.resume(ptr %hdl) call void @llvm.coro.destroy(ptr %hdl) ret i32 0 -; CHECK-NEXT: entry: -; CHECK: call void @print(i32 4) -; CHECK: call void @print(i32 5) -; CHECK: call void @print(i32 6) -; CHECK: ret i32 0 } declare ptr @malloc(i32) diff --git a/llvm/test/Transforms/Coroutines/ex5.ll b/llvm/test/Transforms/Coroutines/ex5.ll --- a/llvm/test/Transforms/Coroutines/ex5.ll +++ b/llvm/test/Transforms/Coroutines/ex5.ll @@ -1,7 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; Fifth example from Doc/Coroutines.rst (final suspend) ; RUN: opt < %s -aa-pipeline=basic-aa -passes='default' -preserve-alignment-assumptions-during-inlining=false -S | FileCheck %s define ptr @f(i32 %n) presplitcoroutine { +; CHECK-LABEL: define noalias nonnull ptr @f +; CHECK-SAME: (i32 [[N:%.*]]) local_unnamed_addr { +; CHECK-NEXT: while.cond: +; CHECK-NEXT: [[ALLOC:%.*]] = tail call ptr @malloc(i32 24) +; CHECK-NEXT: store ptr @f.resume, ptr [[ALLOC]], align 8 +; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[F_FRAME:%.*]], ptr [[ALLOC]], i64 0, i32 1 +; CHECK-NEXT: store ptr @f.destroy, ptr [[DESTROY_ADDR]], align 8 +; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[ALLOC]], i64 0, i32 2 +; CHECK-NEXT: store i32 [[N]], ptr [[N_VAL_SPILL_ADDR]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[AFTERCOROSUSPEND:%.*]], label [[AFTERCOROSUSPEND3:%.*]] +; CHECK: AfterCoroSuspend: +; CHECK-NEXT: tail call void @print(i32 [[N]]) +; CHECK-NEXT: [[INDEX_ADDR5:%.*]] = getelementptr inbounds [[F_FRAME]], ptr [[ALLOC]], i64 0, i32 3 +; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR5]], align 1 +; CHECK-NEXT: br label [[AFTERCOROEND:%.*]] +; CHECK: AfterCoroSuspend3: +; CHECK-NEXT: store ptr null, ptr [[ALLOC]], align 8 +; CHECK-NEXT: br label [[AFTERCOROEND]] +; CHECK: AfterCoroEnd: +; CHECK-NEXT: ret ptr [[ALLOC]] +; entry: %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) %size = call i32 @llvm.coro.size.i32() @@ -18,12 +41,12 @@ call void @print(i32 %n.val) #4 %s = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %s, label %suspend [i8 0, label %while.cond - i8 1, label %cleanup] + i8 1, label %cleanup] while.end: %s.final = call i8 @llvm.coro.suspend(token none, i1 true) switch i8 %s.final, label %suspend [i8 0, label %trap - i8 1, label %cleanup] -trap: + i8 1, label %cleanup] +trap: call void @llvm.trap() unreachable cleanup: @@ -48,8 +71,17 @@ declare ptr @llvm.coro.free(token, ptr) declare i1 @llvm.coro.end(ptr, i1) -; CHECK-LABEL: @main define i32 @main() { +; CHECK-LABEL: define i32 @main() local_unnamed_addr { +; CHECK-NEXT: end: +; CHECK-NEXT: [[ALLOC_I:%.*]] = tail call ptr @malloc(i32 24) +; CHECK-NEXT: tail call void @print(i32 4) +; CHECK-NEXT: tail call void @print(i32 3) +; CHECK-NEXT: tail call void @print(i32 2) +; CHECK-NEXT: tail call void @print(i32 1) +; CHECK-NEXT: tail call void @free(ptr nonnull [[ALLOC_I]]) +; CHECK-NEXT: ret i32 0 +; entry: %hdl = call ptr @f(i32 4) br label %while @@ -61,11 +93,6 @@ call void @llvm.coro.destroy(ptr %hdl) ret i32 0 -; CHECK: call void @print(i32 4) -; CHECK: call void @print(i32 3) -; CHECK: call void @print(i32 2) -; CHECK: call void @print(i32 1) -; CHECK: ret i32 0 } declare i1 @llvm.coro.done(ptr) diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -passes="ipsccp,inline,instcombine" -force-specialization -funcspec-max-iters=2 -S < %s | FileCheck %s --check-prefix=ITERS2 ; RUN: opt -passes="ipsccp,inline,instcombine" -force-specialization -funcspec-max-iters=3 -S < %s | FileCheck %s --check-prefix=ITERS3 ; RUN: opt -passes="ipsccp,inline,instcombine" -force-specialization -funcspec-max-iters=4 -S < %s | FileCheck %s --check-prefix=ITERS4 @@ -6,6 +7,44 @@ @high = internal constant i32 6, align 4 define internal void @recursiveFunc(ptr nocapture readonly %lo, i32 %step, ptr nocapture readonly %hi) { +; ITERS2-LABEL: define internal void @recursiveFunc +; ITERS2-SAME: (ptr nocapture readonly [[LO:%.*]], i32 [[STEP:%.*]], ptr nocapture readonly [[HI:%.*]]) { +; ITERS2-NEXT: [[LO_TEMP:%.*]] = alloca i32, align 4 +; ITERS2-NEXT: [[HI_TEMP:%.*]] = alloca i32, align 4 +; ITERS2-NEXT: [[LO_LOAD:%.*]] = load i32, ptr [[LO]], align 4 +; ITERS2-NEXT: [[HI_LOAD:%.*]] = load i32, ptr [[HI]], align 4 +; ITERS2-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[LO_LOAD]], [[HI_LOAD]] +; ITERS2-NEXT: br i1 [[CMP_NOT]], label [[RET_BLOCK:%.*]], label [[BLOCK6:%.*]] +; ITERS2: block6: +; ITERS2-NEXT: call void @print_val(i32 [[LO_LOAD]], i32 [[HI_LOAD]]) +; ITERS2-NEXT: [[ADD:%.*]] = add nsw i32 [[LO_LOAD]], 1 +; ITERS2-NEXT: [[SUB:%.*]] = add nsw i32 [[HI_LOAD]], -1 +; ITERS2-NEXT: store i32 [[ADD]], ptr [[LO_TEMP]], align 4 +; ITERS2-NEXT: store i32 [[SUB]], ptr [[HI_TEMP]], align 4 +; ITERS2-NEXT: call void @recursiveFunc(ptr nonnull [[LO_TEMP]], i32 1, ptr nonnull [[HI_TEMP]]) +; ITERS2-NEXT: br label [[RET_BLOCK]] +; ITERS2: ret.block: +; ITERS2-NEXT: ret void +; +; ITERS3-LABEL: define internal void @recursiveFunc +; ITERS3-SAME: (ptr nocapture readonly [[LO:%.*]], i32 [[STEP:%.*]], ptr nocapture readonly [[HI:%.*]]) { +; ITERS3-NEXT: [[LO_TEMP:%.*]] = alloca i32, align 4 +; ITERS3-NEXT: [[HI_TEMP:%.*]] = alloca i32, align 4 +; ITERS3-NEXT: [[LO_LOAD:%.*]] = load i32, ptr [[LO]], align 4 +; ITERS3-NEXT: [[HI_LOAD:%.*]] = load i32, ptr [[HI]], align 4 +; ITERS3-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[LO_LOAD]], [[HI_LOAD]] +; ITERS3-NEXT: br i1 [[CMP_NOT]], label [[RET_BLOCK:%.*]], label [[BLOCK6:%.*]] +; ITERS3: block6: +; ITERS3-NEXT: call void @print_val(i32 [[LO_LOAD]], i32 [[HI_LOAD]]) +; ITERS3-NEXT: [[ADD:%.*]] = add nsw i32 [[LO_LOAD]], 1 +; ITERS3-NEXT: [[SUB:%.*]] = add nsw i32 [[HI_LOAD]], -1 +; ITERS3-NEXT: store i32 [[ADD]], ptr [[LO_TEMP]], align 4 +; ITERS3-NEXT: store i32 [[SUB]], ptr [[HI_TEMP]], align 4 +; ITERS3-NEXT: call void @recursiveFunc(ptr nonnull [[LO_TEMP]], i32 1, ptr nonnull [[HI_TEMP]]) +; ITERS3-NEXT: br label [[RET_BLOCK]] +; ITERS3: ret.block: +; ITERS3-NEXT: ret void +; %lo.temp = alloca i32, align 4 %hi.temp = alloca i32, align 4 %lo.load = load i32, ptr %lo, align 4 @@ -26,27 +65,21 @@ ret void } -; ITERS2: @funcspec.arg.4 = internal constant i32 2 -; ITERS2: @funcspec.arg.5 = internal constant i32 4 - -; ITERS3: @funcspec.arg.7 = internal constant i32 3 -; ITERS3: @funcspec.arg.8 = internal constant i32 3 - define i32 @main() { -; ITERS2-LABEL: @main( +; ITERS2-LABEL: define i32 @main() { ; ITERS2-NEXT: call void @print_val(i32 0, i32 6) ; ITERS2-NEXT: call void @print_val(i32 1, i32 5) ; ITERS2-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.4, i32 1, ptr nonnull @funcspec.arg.5) ; ITERS2-NEXT: ret i32 0 ; -; ITERS3-LABEL: @main( +; ITERS3-LABEL: define i32 @main() { ; ITERS3-NEXT: call void @print_val(i32 0, i32 6) ; ITERS3-NEXT: call void @print_val(i32 1, i32 5) ; ITERS3-NEXT: call void @print_val(i32 2, i32 4) ; ITERS3-NEXT: call void @recursiveFunc(ptr nonnull @funcspec.arg.7, i32 1, ptr nonnull @funcspec.arg.8) ; ITERS3-NEXT: ret i32 0 ; -; ITERS4-LABEL: @main( +; ITERS4-LABEL: define i32 @main() { ; ITERS4-NEXT: call void @print_val(i32 0, i32 6) ; ITERS4-NEXT: call void @print_val(i32 1, i32 5) ; ITERS4-NEXT: call void @print_val(i32 2, i32 4) diff --git a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll --- a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll +++ b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll @@ -8,6 +8,8 @@ define void @f(ptr %p, ptr %q, ptr %z) { ; CHECK-LABEL: define void @f ; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[Z:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[P]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[Z]]) ] ; CHECK-NEXT: call void @h(ptr [[P]], ptr [[Q]], ptr [[Z]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/Inline/cgscc-cycle.ll b/llvm/test/Transforms/Inline/cgscc-cycle.ll --- a/llvm/test/Transforms/Inline/cgscc-cycle.ll +++ b/llvm/test/Transforms/Inline/cgscc-cycle.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; This test contains extremely tricky call graph structures for the inliner to ; handle correctly. They form cycles where the inliner introduces code that is ; immediately or can eventually be transformed back into the original code. And @@ -11,28 +12,42 @@ ; The `test1_*` collection of functions form a directly cycling pattern. define void @test1_a(ptr %ptr) { -; CHECK-LABEL: define void @test1_a( +; CHECK-LABEL: define void @test1_a +; CHECK-SAME: (ptr [[PTR:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @test1_b(ptr nonnull @test1_b, i1 false, i32 1) +; CHECK-NEXT: ret void +; entry: call void @test1_b(ptr @test1_b, i1 false, i32 0) ; Inlining and simplifying this call will reliably produce the exact same call, ; over and over again. However, each inlining increments the count, and so we ; expect this test case to stop after one round of inlining with a final ; argument of '1'. -; CHECK-NOT: call -; CHECK: call void @test1_b(ptr nonnull @test1_b, i1 false, i32 1) -; CHECK-NOT: call ret void } define void @test1_b(ptr %arg, i1 %flag, i32 %inline_count) { -; CHECK-LABEL: define void @test1_b( +; CHECK-LABEL: define void @test1_b +; CHECK-SAME: (ptr [[ARG:%.*]], i1 [[FLAG:%.*]], i32 [[INLINE_COUNT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr [[ARG]], ptr [[A]], align 8 +; CHECK-NEXT: br i1 [[FLAG]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @test1_a(ptr nonnull [[A]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[A]], align 8 +; CHECK-NEXT: [[INLINE_COUNT_INC:%.*]] = add i32 [[INLINE_COUNT]], 1 +; CHECK-NEXT: call void [[P]](ptr [[ARG]], i1 [[FLAG]], i32 [[INLINE_COUNT_INC]]) +; CHECK-NEXT: ret void +; entry: %a = alloca ptr store ptr %arg, ptr %a ; This alloca and store should remain through any optimization. -; CHECK: %[[A:.*]] = alloca -; CHECK: store ptr %arg, ptr %[[A]] br i1 %flag, label %bb1, label %bb2 @@ -45,14 +60,17 @@ %inline_count_inc = add i32 %inline_count, 1 call void %p(ptr %arg, i1 %flag, i32 %inline_count_inc) ; And we should continue to load and call indirectly through optimization. -; CHECK: %[[P:.*]] = load ptr, ptr %[[A]] -; CHECK: call void %[[P]]( ret void } define void @test2_a(ptr %ptr) { -; CHECK-LABEL: define void @test2_a( +; CHECK-LABEL: define void @test2_a +; CHECK-SAME: (ptr [[PTR:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @test2_b(ptr nonnull @test2_b, ptr nonnull @test2_c, i1 false, i32 2) +; CHECK-NEXT: ret void +; entry: call void @test2_b(ptr @test2_b, ptr @test2_c, i1 false, i32 0) ; Inlining and simplifying this call will reliably produce the exact same call, @@ -60,20 +78,29 @@ ; @test2_c. We check the exact number of inlining rounds before we cut off to ; break the cycle by inspecting the last paramater that gets incremented with ; each inlined function body. -; CHECK-NOT: call -; CHECK: call void @test2_b(ptr nonnull @test2_b, ptr nonnull @test2_c, i1 false, i32 2) -; CHECK-NOT: call ret void } define void @test2_b(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count) { -; CHECK-LABEL: define void @test2_b( +; CHECK-LABEL: define void @test2_b +; CHECK-SAME: (ptr [[ARG1:%.*]], ptr [[ARG2:%.*]], i1 [[FLAG:%.*]], i32 [[INLINE_COUNT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr [[ARG2]], ptr [[A]], align 8 +; CHECK-NEXT: br i1 [[FLAG]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @test2_a(ptr nonnull [[A]]) #[[ATTR0]] +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[A]], align 8 +; CHECK-NEXT: [[INLINE_COUNT_INC:%.*]] = add i32 [[INLINE_COUNT]], 1 +; CHECK-NEXT: call void [[P]](ptr [[ARG1]], ptr [[ARG2]], i1 [[FLAG]], i32 [[INLINE_COUNT_INC]]) +; CHECK-NEXT: ret void +; entry: %a = alloca ptr store ptr %arg2, ptr %a ; This alloca and store should remain through any optimization. -; CHECK: %[[A:.*]] = alloca -; CHECK: store ptr %arg2, ptr %[[A]] br i1 %flag, label %bb1, label %bb2 @@ -86,20 +113,30 @@ %inline_count_inc = add i32 %inline_count, 1 call void %p(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count_inc) ; And we should continue to load and call indirectly through optimization. -; CHECK: %[[P:.*]] = load ptr, ptr %[[A]] -; CHECK: call void %[[P]]( ret void } define void @test2_c(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count) { -; CHECK-LABEL: define void @test2_c( +; CHECK-LABEL: define void @test2_c +; CHECK-SAME: (ptr [[ARG1:%.*]], ptr [[ARG2:%.*]], i1 [[FLAG:%.*]], i32 [[INLINE_COUNT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr [[ARG1]], ptr [[A]], align 8 +; CHECK-NEXT: br i1 [[FLAG]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: call void @test2_a(ptr nonnull [[A]]) #[[ATTR0]] +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[A]], align 8 +; CHECK-NEXT: [[INLINE_COUNT_INC:%.*]] = add i32 [[INLINE_COUNT]], 1 +; CHECK-NEXT: call void [[P]](ptr [[ARG1]], ptr [[ARG2]], i1 [[FLAG]], i32 [[INLINE_COUNT_INC]]) +; CHECK-NEXT: ret void +; entry: %a = alloca ptr store ptr %arg1, ptr %a ; This alloca and store should remain through any optimization. -; CHECK: %[[A:.*]] = alloca -; CHECK: store ptr %arg1, ptr %[[A]] br i1 %flag, label %bb1, label %bb2 @@ -112,8 +149,6 @@ %inline_count_inc = add i32 %inline_count, 1 call void %p(ptr %arg1, ptr %arg2, i1 %flag, i32 %inline_count_inc) ; And we should continue to load and call indirectly through optimization. -; CHECK: %[[P:.*]] = load ptr, ptr %[[A]] -; CHECK: call void %[[P]]( ret void } @@ -148,19 +183,21 @@ @b = global i64 0 ; Check test3_c is inlined into test3_a once and only once. -; CHECK-LABEL: @test3_a( -; CHECK: tail call void @test3_b() -; CHECK-NEXT: tail call void @test3_d(i32 5) -; CHECK-NEXT: %[[LD1:.*]] = load i64, ptr @a -; CHECK-NEXT: %[[ADD1:.*]] = add nsw i64 %[[LD1]], 1 -; CHECK-NEXT: store i64 %[[ADD1]], ptr @a -; CHECK-NEXT: %[[LD2:.*]] = load i64, ptr @b -; CHECK-NEXT: %[[ADD2:.*]] = add nsw i64 %[[LD2]], 5 -; CHECK-NEXT: store i64 %[[ADD2]], ptr @b -; CHECK-NEXT: ret void - ; Function Attrs: noinline define void @test3_a() #0 { +; CHECK-LABEL: define void @test3_a +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call void @test3_b() +; CHECK-NEXT: tail call void @test3_d(i32 5) +; CHECK-NEXT: [[T6_I:%.*]] = load i64, ptr @a, align 8 +; CHECK-NEXT: [[ADD85_I:%.*]] = add nsw i64 [[T6_I]], 1 +; CHECK-NEXT: store i64 [[ADD85_I]], ptr @a, align 8 +; CHECK-NEXT: [[T0:%.*]] = load i64, ptr @b, align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[T0]], 5 +; CHECK-NEXT: store i64 [[ADD]], ptr @b, align 8 +; CHECK-NEXT: ret void +; entry: tail call void @test3_b() tail call void @test3_c(i32 5) @@ -172,6 +209,15 @@ ; Function Attrs: noinline define void @test3_b() #0 { +; CHECK-LABEL: define void @test3_b +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call void @test3_a() +; CHECK-NEXT: [[T0:%.*]] = load i64, ptr @a, align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[T0]], 2 +; CHECK-NEXT: store i64 [[ADD]], ptr @a, align 8 +; CHECK-NEXT: ret void +; entry: tail call void @test3_a() %t0 = load i64, ptr @a @@ -181,6 +227,25 @@ } define void @test3_d(i32 %i) { +; CHECK-LABEL: define void @test3_d +; CHECK-SAME: (i32 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I]], 5 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @random() +; CHECK-NEXT: [[T0:%.*]] = load i64, ptr @a, align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[T0]], [[CALL]] +; CHECK-NEXT: store i64 [[ADD]], ptr @a, align 8 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: tail call void @test3_c(i32 [[I]]) +; CHECK-NEXT: tail call void @test3_b() +; CHECK-NEXT: [[T6:%.*]] = load i64, ptr @a, align 8 +; CHECK-NEXT: [[ADD79:%.*]] = add nsw i64 [[T6]], 3 +; CHECK-NEXT: store i64 [[ADD79]], ptr @a, align 8 +; CHECK-NEXT: ret void +; entry: %cmp = icmp eq i32 %i, 5 br i1 %cmp, label %if.end, label %if.then @@ -202,6 +267,24 @@ } define void @test3_c(i32 %i) { +; CHECK-LABEL: define void @test3_c +; CHECK-SAME: (i32 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I]], 5 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CALL:%.*]] = tail call i64 @random() +; CHECK-NEXT: [[T0:%.*]] = load i64, ptr @a, align 8 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[T0]], [[CALL]] +; CHECK-NEXT: store i64 [[ADD]], ptr @a, align 8 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: tail call void @test3_d(i32 [[I]]) +; CHECK-NEXT: [[T6:%.*]] = load i64, ptr @a, align 8 +; CHECK-NEXT: [[ADD85:%.*]] = add nsw i64 [[T6]], 1 +; CHECK-NEXT: store i64 [[ADD85]], ptr @a, align 8 +; CHECK-NEXT: ret void +; entry: %cmp = icmp eq i32 %i, 5 br i1 %cmp, label %if.end, label %if.then diff --git a/llvm/test/Transforms/Inline/noalias-calls2.ll b/llvm/test/Transforms/Inline/noalias-calls2.ll --- a/llvm/test/Transforms/Inline/noalias-calls2.ll +++ b/llvm/test/Transforms/Inline/noalias-calls2.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -passes=inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes=inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -8,37 +7,6 @@ declare void @llvm.experimental.noalias.scope.decl(metadata) #0 define void @caller_equals_callee(ptr noalias %p0, ptr noalias %p1, i32 %cnt) { -; CHECK-LABEL: define {{[^@]+}}@caller_equals_callee -; CHECK-SAME: (ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], i32 [[CNT:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 2 -; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 2 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata !0) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata !3) -; CHECK-NEXT: store i32 10, ptr [[ADD_PTR]], align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: store i32 20, ptr [[ADD_PTR1]], align 4, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CNT]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: store i32 11, ptr [[P0]], align 4 -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 1 -; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 1 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !5) -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !8) -; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR2]], i64 2 -; CHECK-NEXT: [[ADD_PTR1_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR3]], i64 2 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata !10) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata !13) -; CHECK-NEXT: store i32 10, ptr [[ADD_PTR_I]], align 4, !alias.scope !15, !noalias !16 -; CHECK-NEXT: store i32 20, ptr [[ADD_PTR1_I]], align 4, !alias.scope !16, !noalias !15 -; CHECK-NEXT: store i32 11, ptr [[ADD_PTR2]], align 4, !alias.scope !5, !noalias !8 -; CHECK-NEXT: store i32 12, ptr [[P1]], align 4 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: ret void -; entry: %add.ptr = getelementptr inbounds i32, ptr %p0, i64 2 %add.ptr1 = getelementptr inbounds i32, ptr %p1, i64 2 @@ -65,42 +33,6 @@ } define void @test01(ptr noalias %p0, ptr noalias %p1, i32 %cnt) { -; CHECK-LABEL: define {{[^@]+}}@test01 -; CHECK-SAME: (ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], i32 [[CNT:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 13, ptr [[P0]], align 4 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[P0]], i64 1 -; CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 1 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !17) -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !20) -; CHECK-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR]], i64 2 -; CHECK-NEXT: [[ADD_PTR1_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR1]], i64 2 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !22) -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !25) -; CHECK-NEXT: store i32 10, ptr [[ADD_PTR_I]], align 4, !alias.scope !27, !noalias !28 -; CHECK-NEXT: store i32 20, ptr [[ADD_PTR1_I]], align 4, !alias.scope !28, !noalias !27 -; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[CNT]], 0 -; CHECK-NEXT: br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] -; CHECK: if.then.i: -; CHECK-NEXT: store i32 11, ptr [[ADD_PTR]], align 4, !alias.scope !17, !noalias !20 -; CHECK-NEXT: br label [[CALLER_EQUALS_CALLEE_EXIT:%.*]] -; CHECK: if.else.i: -; CHECK-NEXT: [[ADD_PTR2_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR1]], i64 1 -; CHECK-NEXT: [[ADD_PTR3_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR]], i64 1 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !29) -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !32) -; CHECK-NEXT: [[ADD_PTR_I_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR2_I]], i64 2 -; CHECK-NEXT: [[ADD_PTR1_I_I:%.*]] = getelementptr inbounds i32, ptr [[ADD_PTR3_I]], i64 2 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !34) -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !37) -; CHECK-NEXT: store i32 10, ptr [[ADD_PTR_I_I]], align 4, !alias.scope !39, !noalias !40 -; CHECK-NEXT: store i32 20, ptr [[ADD_PTR1_I_I]], align 4, !alias.scope !40, !noalias !39 -; CHECK-NEXT: store i32 11, ptr [[ADD_PTR2_I]], align 4, !alias.scope !41, !noalias !42 -; CHECK-NEXT: store i32 12, ptr [[ADD_PTR1]], align 4, !alias.scope !20, !noalias !17 -; CHECK-NEXT: br label [[CALLER_EQUALS_CALLEE_EXIT]] -; CHECK: caller_equals_callee.exit: -; CHECK-NEXT: ret void -; entry: store i32 13, ptr %p0, align 4 %add.ptr = getelementptr inbounds i32, ptr %p0, i64 1 @@ -116,47 +48,3 @@ !2 = distinct !{!2, !"do_store"} !3 = !{!4} !4 = distinct !{!4, !2, !"do_store: %p1"} - -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"do_store: %p0"} -; CHECK: !2 = distinct !{!2, !"do_store"} -; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2, !"do_store: %p1"} -; CHECK: !5 = !{!6} -; CHECK: !6 = distinct !{!6, !7, !"caller_equals_callee: %p0"} -; CHECK: !7 = distinct !{!7, !"caller_equals_callee"} -; CHECK: !8 = !{!9} -; CHECK: !9 = distinct !{!9, !7, !"caller_equals_callee: %p1"} -; CHECK: !10 = !{!11} -; CHECK: !11 = distinct !{!11, !12, !"do_store: %p0"} -; CHECK: !12 = distinct !{!12, !"do_store"} -; CHECK: !13 = !{!14} -; CHECK: !14 = distinct !{!14, !12, !"do_store: %p1"} -; CHECK: !15 = !{!11, !6} -; CHECK: !16 = !{!14, !9} -; CHECK: !17 = !{!18} -; CHECK: !18 = distinct !{!18, !19, !"caller_equals_callee: %p0"} -; CHECK: !19 = distinct !{!19, !"caller_equals_callee"} -; CHECK: !20 = !{!21} -; CHECK: !21 = distinct !{!21, !19, !"caller_equals_callee: %p1"} -; CHECK: !22 = !{!23} -; CHECK: !23 = distinct !{!23, !24, !"do_store: %p0"} -; CHECK: !24 = distinct !{!24, !"do_store"} -; CHECK: !25 = !{!26} -; CHECK: !26 = distinct !{!26, !24, !"do_store: %p1"} -; CHECK: !27 = !{!23, !18} -; CHECK: !28 = !{!26, !21} -; CHECK: !29 = !{!30} -; CHECK: !30 = distinct !{!30, !31, !"caller_equals_callee: %p0"} -; CHECK: !31 = distinct !{!31, !"caller_equals_callee"} -; CHECK: !32 = !{!33} -; CHECK: !33 = distinct !{!33, !31, !"caller_equals_callee: %p1"} -; CHECK: !34 = !{!35} -; CHECK: !35 = distinct !{!35, !36, !"do_store: %p0"} -; CHECK: !36 = distinct !{!36, !"do_store"} -; CHECK: !37 = !{!38} -; CHECK: !38 = distinct !{!38, !36, !"do_store: %p1"} -; CHECK: !39 = !{!35, !30, !21} -; CHECK: !40 = !{!38, !33, !18} -; CHECK: !41 = !{!30, !21} -; CHECK: !42 = !{!33, !18} diff --git a/llvm/test/Transforms/Inline/nonnull.ll b/llvm/test/Transforms/Inline/nonnull.ll --- a/llvm/test/Transforms/Inline/nonnull.ll +++ b/llvm/test/Transforms/Inline/nonnull.ll @@ -54,6 +54,7 @@ define void @caller(ptr nonnull %arg) { ; CHECK-LABEL: define void @caller ; CHECK-SAME: (ptr nonnull [[ARG:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[ARG]]) ] ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: ret void ; @@ -75,6 +76,7 @@ define void @caller3(ptr %arg) { ; CHECK-LABEL: define void @caller3 ; CHECK-SAME: (ptr [[ARG:%.*]]) { +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[ARG]]) ] ; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq ptr [[ARG]], null ; CHECK-NEXT: br i1 [[CMP_I]], label [[EXPENSIVE_I:%.*]], label [[DONE_I:%.*]] ; CHECK: expensive.i: @@ -99,10 +101,11 @@ ret void } +; Positive test - arg is known non null define void @caller4(ptr dereferenceable(8) %arg) { ; CHECK-LABEL: define void @caller4 ; CHECK-SAME: (ptr dereferenceable(8) [[ARG:%.*]]) { -; CHECK-NEXT: call void @callee(ptr dereferenceable(8) [[ARG]]) +; CHECK-NEXT: call void @bar() ; CHECK-NEXT: ret void ; call void @callee(ptr dereferenceable(8) %arg) @@ -122,7 +125,24 @@ define void @caller6(ptr %arg) { ; CHECK-LABEL: define void @caller6 ; CHECK-SAME: (ptr [[ARG:%.*]]) { -; CHECK-NEXT: call void @callee(ptr dereferenceable(8) [[ARG]]) +; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq ptr [[ARG]], null +; CHECK-NEXT: br i1 [[CMP_I]], label [[EXPENSIVE_I:%.*]], label [[DONE_I:%.*]] +; CHECK: expensive.i: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[CALLEE_EXIT:%.*]] +; CHECK: done.i: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[CALLEE_EXIT]] +; CHECK: callee.exit: ; CHECK-NEXT: ret void ; call void @callee(ptr dereferenceable(8) %arg) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -76,7 +76,7 @@ ; O2: for.cond.cleanup3: ; O2-NEXT: [[INC7]] = add nuw nsw i64 [[I_06]], 1 ; O2-NEXT: [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100 -; O2-NEXT: br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; O2-NEXT: br i1 [[EXITCOND7_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP8:![0-9]+]] ; O2: for.body4: ; O2-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], [[FOR_BODY4]] ], [ [[J_05_PH]], [[FOR_BODY4_PREHEADER9]] ] ; O2-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[J_05]] @@ -85,7 +85,7 @@ ; O2-NEXT: store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[TBAA0]] ; O2-NEXT: [[INC5]] = add nuw i64 [[J_05]], 1 ; O2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]] -; O2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP8:![0-9]+]] +; O2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP9:![0-9]+]] ; ; O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy ; O3-SAME: (ptr nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { @@ -127,7 +127,7 @@ ; O3-NEXT: store i32 [[INC_US]], ptr [[ADD_PTR_I_US]], align 4, !tbaa [[TBAA0]] ; O3-NEXT: [[INC5_US]] = add nuw i64 [[J_05_US]], 1 ; O3-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5_US]], [[NUMELEMS]] -; O3-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP7:![0-9]+]] +; O3-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]] ; O3: for.cond1.for.cond.cleanup3_crit_edge.us: ; O3-NEXT: [[INC7_US]] = add nuw nsw i64 [[I_06_US]], 1 ; O3-NEXT: [[EXITCOND8_NOT:%.*]] = icmp eq i64 [[INC7_US]], 100 diff --git a/llvm/test/Transforms/PhaseOrdering/dae-dce.ll b/llvm/test/Transforms/PhaseOrdering/dae-dce.ll --- a/llvm/test/Transforms/PhaseOrdering/dae-dce.ll +++ b/llvm/test/Transforms/PhaseOrdering/dae-dce.ll @@ -14,9 +14,14 @@ } define internal void @capture_and_trap(ptr %ptr) noinline { -; CHECK-LABEL: @capture_and_trap( -; CHECK-NEXT: tail call void @llvm.trap() -; CHECK-NEXT: unreachable +; DEFAULT-LABEL: @capture_and_trap( +; DEFAULT-NEXT: tail call void @llvm.trap() +; DEFAULT-NEXT: unreachable +; +; LTO-LABEL: @capture_and_trap( +; LTO-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr poison) ] +; LTO-NEXT: tail call void @llvm.trap() +; LTO-NEXT: unreachable ; %alloca = alloca ptr, align 4 store ptr %ptr, ptr %alloca, align 4 @@ -47,6 +52,3 @@ call void @capture_and_trap(ptr @dead_fn2) unreachable } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; DEFAULT: {{.*}} -; LTO: {{.*}}