Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -83,7 +83,7 @@ static cl::opt UseNoAliasIntrinsic( "use-noalias-intrinsic-during-inlining", cl::Hidden, cl::ZeroOrMore, - cl::init(false), + cl::init(true), cl::desc("Use the llvm.noalias intrinsic during inlining.")); // Disabled by default, because the added alignment assumptions may increase Index: llvm/test/Transforms/Coroutines/ArgAddr.ll =================================================================== --- llvm/test/Transforms/Coroutines/ArgAddr.ll +++ llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -47,6 +47,7 @@ ret i32 0 ; CHECK: call void @ctor ; CHECK-NEXT: call void @print(i32 4) +; CHECK-NEXT: bitcast i8* ; CHECK-NEXT: call void @print(i32 3) ; CHECK-NEXT: call void @print(i32 2) ; CHECK: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -65,8 +65,12 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* -; CHECK-NEXT: store i32 7, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_0_PROVENANCE]] to i32* +; CHECK-NEXT: store i32 7, i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 7) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon-value.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-value.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-value.ll @@ -75,16 +75,23 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* ; CHECK-NEXT: store i32 4, i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_0_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_1_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_1_META:!.*]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_1_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_1_META]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_1_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon.ll +++ llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -65,16 +65,23 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* ; CHECK-NEXT: store i32 4, i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_0_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_1_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_1_META:!.*]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_1_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_1_META]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_1_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Inline/launder.invariant.group.ll =================================================================== --- llvm/test/Transforms/Inline/launder.invariant.group.ll +++ llvm/test/Transforms/Inline/launder.invariant.group.ll @@ -1,6 +1,10 @@ -; RUN: opt -S -inline < %s | FileCheck %s -; RUN: opt -S -O3 < %s | FileCheck %s -; RUN: opt -S -inline -inline-threshold=1 < %s | FileCheck %s +; RUN: opt -S -inline --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED +; RUN: opt -S -O3 --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED,CHECK_OPT +; RUN: opt -S -inline -inline-threshold=1 --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED + +; RUN: opt -S -inline --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS +; RUN: opt -S -O3 --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_PROVENANCE,CHECK_OPT +; RUN: opt -S -inline -inline-threshold=1 --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS %struct.A = type <{ i32 (...)**, i32, [4 x i8] }> @@ -9,7 +13,7 @@ ; sometimes it would be considered noalias. ; CHECK-LABEL: define i32 @bar(%struct.A* noalias define i32 @bar(%struct.A* noalias) { -; CHECK-NOT: noalias +; CHECK_SCOPED-NOT: noalias %2 = bitcast %struct.A* %0 to i8* %3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %2) %4 = getelementptr inbounds i8, i8* %3, i64 8 @@ -22,10 +26,24 @@ ; CHECK-LABEL: define i32 @foo(%struct.A* noalias define i32 @foo(%struct.A* noalias) { - ; CHECK-NOT: call i32 @bar( - ; CHECK-NOT: noalias + ; CHECK_SCOPED-NOT: call i32 @bar( + ; CHECK_SCOPED-NOT: noalias + + ; CHECK_NOALIAS-NOT: call i32 @bar( + ; CHECK_NOALIAS: @llvm.noalias.decl.p0 + ; CHECK_NOALIAS-NEXT: @llvm.noalias.p0 + ; CHECK_NOALIAS-NOT: call i32 @bar( + + ; CHECK_PROVENANCE-NOT: call i32 @bar( + ; CHECK_PROVENANCE: @llvm.noalias.decl.p0 + ; CHECK_PROVENANCE-NEXT: @llvm.provenance.noalias.p0 + ; CHECK_PROVENANCE-NOT: call i32 @bar( + ; CHECK_PROVENANCE: @llvm.noalias.arg.guard.p0 + ; CHECK_PROVENANCE-NOT: call i32 @bar( %2 = tail call i32 @bar(%struct.A* %0) ret i32 %2 + + ; CHECK_OPT: ret i32 42 } Index: llvm/test/Transforms/Inline/parallel-loop-md-merge.ll =================================================================== --- llvm/test/Transforms/Inline/parallel-loop-md-merge.ll +++ llvm/test/Transforms/Inline/parallel-loop-md-merge.ll @@ -1,4 +1,5 @@ -; RUN: opt -always-inline -globalopt -S < %s | FileCheck %s +; RUN: opt -always-inline -globalopt --use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPE +; RUN: opt -always-inline -globalopt --use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS ; ; static void __attribute__((always_inline)) callee(long n, double A[static const restrict n], long i) { ; for (long j = 0; j < n; j += 1) @@ -64,14 +65,21 @@ !11 = distinct !{!11, !12} ; LoopID !12 = !{!"llvm.loop.parallel_accesses", !10} +; There is a small reordering when noalias intrinsics are introduced ; CHECK: store double 4.200000e+01, {{.*}} !llvm.access.group ![[ACCESS_GROUP_LIST_3:[0-9]+]] ; CHECK: br label %for.cond.i, !llvm.loop ![[LOOP_INNER:[0-9]+]] ; CHECK: br label %for.cond, !llvm.loop ![[LOOP_OUTER:[0-9]+]] -; CHECK: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER:[0-9]+]]} -; CHECK: ![[ACCESS_GROUP_INNER]] = distinct !{} -; CHECK: ![[ACCESS_GROUP_OUTER]] = distinct !{} + +; CHECK_SCOPE: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER:[0-9]+]]} +; CHECK_SCOPE: ![[ACCESS_GROUP_INNER]] = distinct !{} +; CHECK_SCOPE: ![[ACCESS_GROUP_OUTER]] = distinct !{} + +; CHECK_NOALIAS: ![[ACCESS_GROUP_OUTER:[0-9]+]] = distinct !{} +; CHECK_NOALIAS: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER]]} +; CHECK_NOALIAS: ![[ACCESS_GROUP_INNER]] = distinct !{} + ; CHECK: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[ACCESSES_INNER:[0-9]+]]} ; CHECK: ![[ACCESSES_INNER]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_INNER]]} ; CHECK: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[ACCESSES_OUTER:[0-9]+]]}