diff --git a/llvm/test/Transforms/InferAlignment/addrspace.ll b/llvm/test/Transforms/InferAlignment/addrspace.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/addrspace.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=no-op-function %s -o - | FileCheck %s + +; ------------------------------------------------------------------------------ +; Vector type +; ------------------------------------------------------------------------------ + +@v4f_as3 = addrspace(3) global <4 x float> zeroinitializer + +define <4 x float> @constant_fold_bitcast_vector_as() { +; CHECK-LABEL: define <4 x float> @constant_fold_bitcast_vector_as() { +; CHECK-NEXT: [[A:%.*]] = load <4 x float>, ptr addrspace(3) @v4f_as3, align 4 +; CHECK-NEXT: ret <4 x float> [[A]] +; + %a = load <4 x float>, ptr addrspace(3) @v4f_as3, align 4 + ret <4 x float> %a +} + +; ------------------------------------------------------------------------------ +; Array type +; ------------------------------------------------------------------------------ + +@i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer + +define i32 @test_cast_gep_small_indices_as() { +; CHECK-LABEL: define i32 @test_cast_gep_small_indices_as() { +; CHECK-NEXT: [[X:%.*]] = load i32, ptr addrspace(3) @i32_array_as3, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load i32, ptr addrspace(3) @i32_array_as3, align 4 + ret i32 %x +} + +define i32 @test_cast_gep_large_indices_as() { +; CHECK-LABEL: define i32 @test_cast_gep_large_indices_as() { +; CHECK-NEXT: [[X:%.*]] = load i32, ptr addrspace(3) @i32_array_as3, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load i32, ptr addrspace(3) @i32_array_as3, align 4 + ret i32 %x +} + +; ------------------------------------------------------------------------------ +; Struct type +; ------------------------------------------------------------------------------ + +%struct.foo = type { float, float, [4 x i32], ptr addrspace(3) } + +@global_ptr = addrspace(3) global %struct.foo { + float 0.0, + float 0.0, + [4 x i32] zeroinitializer, + ptr addrspace(3) @i32_array_as3 +} + +define i32 @test_constant_cast_gep_struct_indices_as() { +; CHECK-LABEL: define i32 @test_constant_cast_gep_struct_indices_as() { +; CHECK-NEXT: [[X:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr addrspace(3) @global_ptr, i18 0, i32 2, i12 2 +; CHECK-NEXT: [[Y:%.*]] = load i32, ptr addrspace(3) [[X]], align 4 +; CHECK-NEXT: ret i32 [[Y]] +; + %x = getelementptr %struct.foo, ptr addrspace(3) @global_ptr, i18 0, i32 2, i12 2 + %y = load i32, ptr addrspace(3) %x, align 4 + ret i32 %y +} diff --git a/llvm/test/Transforms/InferAlignment/alloca-cast.ll b/llvm/test/Transforms/InferAlignment/alloca-cast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/alloca-cast.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -S -passes=no-op-function | FileCheck %s + +; C source: +; struct Foo { +; int x, y; +; }; +; void escape(const ptr); +; void f(struct Foo *p) { +; struct Foo local; +; *(__int64 *)&local = *(__int64 *)p; +; escape(&local); +; } + +; ModuleID = '' +source_filename = "t.c" +target triple = "x86_64-pc-windows-msvc19.11.25508" + +%struct.Foo = type { i32, i32 } + +define void @f(ptr %p) { +; CHECK-LABEL: define void @f +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: store i64 [[TMP0]], ptr [[LOCAL]], align 4 +; CHECK-NEXT: call void @escape(ptr [[LOCAL]]) +; CHECK-NEXT: ret void +; +entry: + %local = alloca %struct.Foo, align 4 + %0 = load i64, ptr %p, align 8 + store i64 %0, ptr %local, align 4 + call void @escape(ptr %local) + ret void +} + +declare void @escape(ptr) diff --git a/llvm/test/Transforms/InferAlignment/alloca.ll b/llvm/test/Transforms/InferAlignment/alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/alloca.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s +target triple = "i386-apple-darwin9.6" + +%struct.Key = type { { i32, i32 } } + +define i32 @bar(i64 %key_token2) nounwind { +; CHECK-LABEL: define i32 @bar +; CHECK-SAME: (i64 [[KEY_TOKEN2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IOSPEC:%.*]] = alloca [[STRUCT_KEY:%.*]], align 8 +; CHECK-NEXT: [[RET:%.*]] = alloca i32, align 4 +; CHECK-NEXT: %"alloca point" = bitcast i32 0 to i32 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_KEY]], ptr [[IOSPEC]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i32, i32 }, ptr [[TMP0]], i32 0, i32 0 +; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr { i32, i32 }, ptr [[TMP0]], i32 0, i32 1 +; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_KEY]], ptr [[IOSPEC]], i32 0, i32 0 +; CHECK-NEXT: store i64 [[KEY_TOKEN2]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 (...) @foo(ptr byval([[STRUCT_KEY]]) align 4 [[IOSPEC]], ptr [[RET]]) #[[ATTR0]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[RET]], align 4 +; CHECK-NEXT: ret i32 [[TMP5]] +; +entry: + %iospec = alloca %struct.Key ; [#uses=3] + %ret = alloca i32 ; [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %0 = getelementptr %struct.Key, ptr %iospec, i32 0, i32 0 ; [#uses=2] + %1 = getelementptr { i32, i32 }, ptr %0, i32 0, i32 0 ; [#uses=1] + store i32 0, ptr %1, align 4 + %2 = getelementptr { i32, i32 }, ptr %0, i32 0, i32 1 ; [#uses=1] + store i32 0, ptr %2, align 4 + %3 = getelementptr %struct.Key, ptr %iospec, i32 0, i32 0 ; [#uses=1] + store i64 %key_token2, ptr %3, align 4 + %4 = call i32 (...) @foo(ptr byval(%struct.Key) align 4 %iospec, ptr %ret) nounwind ; [#uses=0] + %5 = load i32, ptr %ret, align 4 ; [#uses=1] + ret i32 %5 +} + +declare i32 @foo(...) diff --git a/llvm/test/Transforms/InferAlignment/assume-inevitable.ll b/llvm/test/Transforms/InferAlignment/assume-inevitable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/assume-inevitable.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s + +; Check that assume is propagated backwards through all +; operations that are `isGuaranteedToTransferExecutionToSuccessor` +; (it should reach the load and mark it as `align 32`). +define i32 @assume_inevitable(ptr %a, ptr %b, ptr %c) { +; CHECK-LABEL: define i32 @assume_inevitable +; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DUMMY:%.*]] = alloca i8, align 4 +; CHECK-NEXT: [[M:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[LOADRES:%.*]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: store i32 [[LOADRES]], ptr [[A]], align 4 +; CHECK-NEXT: [[OBJSZ:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[C]], i1 false, i1 false, i1 false) +; CHECK-NEXT: store i64 [[OBJSZ]], ptr [[M]], align 4 +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %dummy = alloca i8, align 4 + %m = alloca i64 + %0 = load i32, ptr %a, align 4 + + %loadres = load i32, ptr %b + store i32 %loadres, ptr %a + + %objsz = call i64 @llvm.objectsize.i64.p0(ptr %c, i1 false) + store i64 %objsz, ptr %m + + %ptrint = ptrtoint ptr %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + + ret i32 %0 +} + +declare i64 @llvm.objectsize.i64.p0(ptr, i1) +declare void @llvm.assume(i1) diff --git a/llvm/test/Transforms/InferAlignment/assume-loop-align.ll b/llvm/test/Transforms/InferAlignment/assume-loop-align.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/assume-loop-align.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=no-op-function -S < %s | FileCheck %s +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(ptr %a, ptr %b) #0 { +; CHECK-LABEL: define void @foo +; CHECK-SAME: (ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: [[PTRINT1:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[MASKEDPTR2:%.*]] = and i64 [[PTRINT1]], 63 +; CHECK-NEXT: [[MASKCOND3:%.*]] = icmp eq i64 [[MASKEDPTR2]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND3]]) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVAR]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVAR]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVAR_NEXT]] = add nuw nsw i64 [[INDVAR]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVAR_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 1648 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %ptrint = ptrtoint ptr %a to i64 + %maskedptr = and i64 %ptrint, 63 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + + %ptrint1 = ptrtoint ptr %b to i64 + %maskedptr2 = and i64 %ptrint1, 63 + %maskcond3 = icmp eq i64 %maskedptr2, 0 + tail call void @llvm.assume(i1 %maskcond3) + + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ] + + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvar + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, 1 + %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvar + store i32 %add, ptr %arrayidx2, align 4 + + %indvar.next = add nuw nsw i64 %indvar, 16 + %1 = trunc i64 %indvar.next to i32 + %cmp = icmp slt i32 %1, 1648 + + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #1 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/InferAlignment/atomic.ll b/llvm/test/Transforms/InferAlignment/atomic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/atomic.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S < %s -passes=no-op-function | FileCheck %s +target triple = "x86_64-apple-macosx10.7.0" + +; Check transforms involving atomic operations + +; ------------------------------------------------------------------------------ +; load instructions +; ------------------------------------------------------------------------------ + +@c = constant i64 42 + +define i8 @test_atomic_global_load() { +; CHECK-LABEL: define i8 @test_atomic_global_load() { +; CHECK-NEXT: [[V:%.*]] = load atomic i8, ptr @c seq_cst, align 1 +; CHECK-NEXT: ret i8 [[V]] +; + %v = load atomic i8, ptr @c seq_cst, align 1 + ret i8 %v +} + +define i32 @test1() { +; CHECK-LABEL: define i32 @test1() { +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr @c unordered, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load atomic i32, ptr @c unordered, align 4 + ret i32 %x +} + +define i32 @test2() { +; CHECK-LABEL: define i32 @test2() { +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr @c monotonic, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load atomic i32, ptr @c monotonic, align 4 + ret i32 %x +} + +define i32 @test3() { +; CHECK-LABEL: define i32 @test3() { +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr @c seq_cst, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load atomic i32, ptr @c seq_cst, align 4 + ret i32 %x +} + +; ------------------------------------------------------------------------------ +; store instructions +; ------------------------------------------------------------------------------ + +; An unordered access to null is still unreachable. There's no +; ordering imposed. +define i32 @test4() { +; CHECK-LABEL: define i32 @test4() { +; CHECK-NEXT: store atomic i32 0, ptr @c unordered, align 4 +; CHECK-NEXT: ret i32 0 +; + store atomic i32 0, ptr @c unordered, align 4 + ret i32 0 +} + +define i32 @test5() { +; CHECK-LABEL: define i32 @test5() { +; CHECK-NEXT: store atomic i32 0, ptr @c monotonic, align 4 +; CHECK-NEXT: ret i32 0 +; + store atomic i32 0, ptr @c monotonic, align 4 + ret i32 0 +} + +define i32 @test6() { +; CHECK-LABEL: define i32 @test6() { +; CHECK-NEXT: store atomic i32 0, ptr @c seq_cst, align 4 +; CHECK-NEXT: ret i32 0 +; + store atomic i32 0, ptr @c seq_cst, align 4 + ret i32 0 +} + +; ------------------------------------------------------------------------------ +; load of null +; ------------------------------------------------------------------------------ + +define i32 @test1_null() { +; CHECK-LABEL: define i32 @test1_null() { +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null unordered, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load atomic i32, ptr null unordered, align 4 + ret i32 %x +} + +define i32 @test2_null() { +; CHECK-LABEL: define i32 @test2_null() { +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null monotonic, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load atomic i32, ptr null monotonic, align 4 + ret i32 %x +} + +define i32 @test3_null() { +; CHECK-LABEL: define i32 @test3_null() { +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr null seq_cst, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + %x = load atomic i32, ptr null seq_cst, align 4 + ret i32 %x +} + +; ------------------------------------------------------------------------------ +; store of null +; ------------------------------------------------------------------------------ + +define i32 @test4_null() { +; CHECK-LABEL: define i32 @test4_null() { +; CHECK-NEXT: store atomic i32 0, ptr null unordered, align 4 +; CHECK-NEXT: ret i32 0 +; + store atomic i32 0, ptr null unordered, align 4 + ret i32 0 +} + +define i32 @test5_null() { +; CHECK-LABEL: define i32 @test5_null() { +; CHECK-NEXT: store atomic i32 0, ptr null monotonic, align 4 +; CHECK-NEXT: ret i32 0 +; + store atomic i32 0, ptr null monotonic, align 4 + ret i32 0 +} + +define i32 @test6_null() { +; CHECK-LABEL: define i32 @test6_null() { +; CHECK-NEXT: store atomic i32 0, ptr null seq_cst, align 4 +; CHECK-NEXT: ret i32 0 +; + store atomic i32 0, ptr null seq_cst, align 4 + ret i32 0 +} diff --git a/llvm/test/Transforms/InferAlignment/basic.ll b/llvm/test/Transforms/InferAlignment/basic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/basic.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=no-op-function -S < %s | FileCheck %s + +; ------------------------------------------------------------------------------ +; Undef and null +; ------------------------------------------------------------------------------ + +define i32 @load_from_undef(ptr %P) { +; CHECK-LABEL: define i32 @load_from_undef +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = load i32, ptr undef, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %ret = load i32, ptr undef + ret i32 %ret +} + +define i32 @load_from_null(ptr %P) { +; CHECK-LABEL: define i32 @load_from_null +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %ret = load i32, ptr null + ret i32 %ret +} + +define void @store_into_undef(ptr %P) { +; CHECK-LABEL: define void @store_into_undef +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 123, ptr undef, align 4 +; CHECK-NEXT: ret void +; + store i32 123, ptr undef + ret void +} + +define void @store_into_null(ptr %P) { +; CHECK-LABEL: define void @store_into_null +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 124, ptr null, align 4 +; CHECK-NEXT: ret void +; + store i32 124, ptr null + ret void +} + + +; ------------------------------------------------------------------------------ +; Scalar types +; ------------------------------------------------------------------------------ + +; This test must not have the store of %x forwarded to the load -- there is an +; intervening store of %y. However, the intervening store occurs with a different +; type and size and to a different pointer value. This is ensuring that none of +; those confuse the analysis into thinking that the second store does not alias +; the first. + +define i8 @test1(i8 %x, i32 %y) { +; CHECK-LABEL: define i8 @test1 +; CHECK-SAME: (i8 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i8 [[X]], ptr [[ALLOC]], align 1 +; CHECK-NEXT: store i32 [[Y]], ptr [[ALLOC]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load i8, ptr [[ALLOC]], align 1 +; CHECK-NEXT: ret i8 [[RET]] +; + %alloc = alloca i32 + store i8 %x, ptr %alloc + store i32 %y, ptr %alloc + %ret = load i8, ptr %alloc + ret i8 %ret +} + +@test2_global = external global i32 + +; Same test as @test1 essentially, but using a global instead of an alloca. +define i8 @test2(i8 %x, i32 %y) { +; CHECK-LABEL: define i8 @test2 +; CHECK-SAME: (i8 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: store i8 [[X]], ptr @test2_global, align 1 +; CHECK-NEXT: store i32 [[Y]], ptr @test2_global, align 4 +; CHECK-NEXT: [[RET:%.*]] = load i8, ptr @test2_global, align 1 +; CHECK-NEXT: ret i8 [[RET]] +; + store i8 %x, ptr @test2_global + store i32 %y, ptr @test2_global + %ret = load i8, ptr @test2_global + ret i8 %ret +} diff --git a/llvm/test/Transforms/InferAlignment/dbginfo-1.ll b/llvm/test/Transforms/InferAlignment/dbginfo-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/dbginfo-1.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes='no-op-function' -S | FileCheck %s + +define i32 @foo( %x) { +; CHECK-LABEL: define i32 @foo +; CHECK-SAME: ( [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARR]], metadata [[META8:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; CHECK-NEXT: store [[X]], ptr [[ARR]], align 4 +; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[ARR]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %arr = alloca i32, align 4 + call void @llvm.dbg.declare(metadata ptr %arr, metadata !8, metadata !DIExpression()), !dbg !14 + store %x, ptr %arr, align 4 + %res = load i32, ptr %arr + ret i32 %res +} + +define i32 @foo2( %x) { +; CHECK-LABEL: define i32 @foo2 +; CHECK-SAME: ( [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 +; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARR]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: store [[X]], ptr [[ARR]], align 4 +; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[ARR]], align 4 +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %arr = alloca [4 x i32], align 4 + call void @llvm.dbg.declare(metadata ptr %arr, metadata !15, metadata !DIExpression()), !dbg !17 + store %x, ptr %arr, align 4 + %res = load i32, ptr %arr + ret i32 %res +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 17.0.0 (git@github.com:llvm/llvm-project.git a489e11439e36c7e0ec83b28a6fb1596a5c21faa)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "u.cpp", directory: "/path/to/test", checksumkind: CSK_MD5, checksum: "42f62c17fb0f0110f515890bc6d69cb5") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 5} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 1, !"target-abi", !"lp64d"} +!7 = !{i32 8, !"SmallDataLimit", i32 8} +!8 = !DILocalVariable(name: "arr", scope: !9, file: !1, line: 3, type: !12) +!9 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !1, file: !1, line: 2, type: !10, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!10 = !DISubroutineType(types: !11) +!11 = !{null} +!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 128, elements: !2) +!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!14 = !DILocation(line: 3, column: 7, scope: !9) +!15 = !DILocalVariable(name: "arr", scope: !16, file: !1, line: 6, type: !12) +!16 = distinct !DISubprogram(name: "foo2", linkageName: "foo2", scope: !1, file: !1, line: 5, type: !10, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!17 = !DILocation(line: 6, column: 7, scope: !16) diff --git a/llvm/test/Transforms/InferAlignment/dbginfo-2.ll b/llvm/test/Transforms/InferAlignment/dbginfo-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/dbginfo-2.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=no-op-function -S < %s | FileCheck %s + +define void @combine_metadata_dominance1(ptr %p) { +; CHECK-LABEL: define void @combine_metadata_dominance1 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8, !align !0 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[B:%.*]] = load ptr, ptr [[P]], align 8, !align !1 +; CHECK-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 +; CHECK-NEXT: ret void +; +entry: + %a = load ptr, ptr %p, !align !2 + br label %bb1 + +bb1: + %b = load ptr, ptr %p, !align !3 + store i32 0, ptr %a + store i32 0, ptr %b + ret void +} + +define void @combine_metadata_dominance2(ptr %p) { +; CHECK-LABEL: define void @combine_metadata_dominance2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8, !align !0, !noundef !2 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[B:%.*]] = load ptr, ptr [[P]], align 8, !align !1 +; CHECK-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 +; CHECK-NEXT: ret void +; +entry: + %a = load ptr, ptr %p, !align !2, !noundef !1 + br label %bb1 + +bb1: + %b = load ptr, ptr %p, !align !3 + store i32 0, ptr %a + store i32 0, ptr %b + ret void +} + +!0 = !{} +!1 = !{} +!2 = !{i64 8} +!3 = !{i64 4} diff --git a/llvm/test/Transforms/InferAlignment/gep.ll b/llvm/test/Transforms/InferAlignment/gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/gep.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=no-op-function -S < %s | FileCheck %s + +@x = external global <2 x i64>, align 16 +@xx = external global [13 x <2 x i64>], align 16 + +; ------------------------------------------------------------------------------ +; load instructions +; ------------------------------------------------------------------------------ + +define <2 x i64> @hem(i32 %i) { +; CHECK-LABEL: define <2 x i64> @hem +; CHECK-SAME: (i32 [[I:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = getelementptr <2 x i64>, ptr @x, i32 [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[T]], align 1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %t = getelementptr <2 x i64>, ptr @x, i32 %i + %tmp1 = load <2 x i64>, ptr %t, align 1 + ret <2 x i64> %tmp1 +} + +define <2 x i64> @hem_2d(i32 %i, i32 %j) { +; CHECK-LABEL: define <2 x i64> @hem_2d +; CHECK-SAME: (i32 [[I:%.*]], i32 [[J:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = getelementptr [13 x <2 x i64>], ptr @xx, i32 [[I]], i32 [[J]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[T]], align 1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %t = getelementptr [13 x <2 x i64>], ptr @xx, i32 %i, i32 %j + %tmp1 = load <2 x i64>, ptr %t, align 1 + ret <2 x i64> %tmp1 +} + +; TODO: For non-byte-sized vectors, current implementation assumes there is +; padding to the next byte boundary between elements. +@foo = constant <2 x i4> , align 8 + +define i4 @test_vector_load_i4() { +; CHECK-LABEL: define i4 @test_vector_load_i4() { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr @foo, i64 0 +; CHECK-NEXT: [[RES0:%.*]] = load i4, ptr [[PTR0]], align 1 +; CHECK-NEXT: ret i4 [[RES0]] +; + %ptr0 = getelementptr i8, ptr @foo, i64 0 + %res0 = load i4, ptr %ptr0, align 1 + ret i4 %res0 +} + +; Check that we improve the alignment information. +; The base pointer is 16-byte aligned and we access the field at +; an offset of 8-byte. +; Every element in the @CallerInfos array is 16-byte aligned so +; any access from the following gep is 8-byte aligned. +%struct.CallerInfo = type { ptr, i32 } +@CallerInfos = global [128 x %struct.CallerInfo] zeroinitializer, align 16 + +define i32 @test_gep_in_struct(i64 %idx) { +; CHECK-LABEL: define i32 @test_gep_in_struct +; CHECK-SAME: (i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[NS7:%.*]] = getelementptr inbounds [128 x %struct.CallerInfo], ptr @CallerInfos, i64 0, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[NS7]], align 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %NS7 = getelementptr inbounds [128 x %struct.CallerInfo], ptr @CallerInfos, i64 0, i64 %idx, i32 1 + %res = load i32, ptr %NS7, align 1 + ret i32 %res +} + +; ------------------------------------------------------------------------------ +; store instructions +; ------------------------------------------------------------------------------ + +define void @hem_store(i32 %i, <2 x i64> %y) { +; CHECK-LABEL: define void @hem_store +; CHECK-SAME: (i32 [[I:%.*]], <2 x i64> [[Y:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = getelementptr <2 x i64>, ptr @x, i32 [[I]] +; CHECK-NEXT: store <2 x i64> [[Y]], ptr [[T]], align 1 +; CHECK-NEXT: ret void +; + %t = getelementptr <2 x i64>, ptr @x, i32 %i + store <2 x i64> %y, ptr %t, align 1 + ret void +} + +define void @hem_2d_store(i32 %i, i32 %j, <2 x i64> %y) { +; CHECK-LABEL: define void @hem_2d_store +; CHECK-SAME: (i32 [[I:%.*]], i32 [[J:%.*]], <2 x i64> [[Y:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = getelementptr [13 x <2 x i64>], ptr @xx, i32 [[I]], i32 [[J]] +; CHECK-NEXT: store <2 x i64> [[Y]], ptr [[T]], align 1 +; CHECK-NEXT: ret void +; + %t = getelementptr [13 x <2 x i64>], ptr @xx, i32 %i, i32 %j + store <2 x i64> %y, ptr %t, align 1 + ret void +} + +@arst = addrspace(1) global [4 x ptr addrspace(2)] zeroinitializer + +define void @test_evaluate_gep_as_ptrs_array(ptr addrspace(2) %B) { +; CHECK-LABEL: define void @test_evaluate_gep_as_ptrs_array +; CHECK-SAME: (ptr addrspace(2) [[B:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = getelementptr [4 x ptr addrspace(2)], ptr addrspace(1) @arst, i16 0, i16 2 +; CHECK-NEXT: store ptr addrspace(2) [[B]], ptr addrspace(1) [[A]], align 8 +; CHECK-NEXT: ret void +; + %A = getelementptr [4 x ptr addrspace(2)], ptr addrspace(1) @arst, i16 0, i16 2 + store ptr addrspace(2) %B, ptr addrspace(1) %A + ret void +} + +; Constant folding should fix notionally out-of-bounds indices +; and add inbounds keywords. + +%struct.X = type { [3 x i32], [3 x i32] } + +@Y = internal global [3 x %struct.X] zeroinitializer + +define void @frob() { +; CHECK-LABEL: define void @frob() { +; CHECK-NEXT: store i32 1, ptr @Y, align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr inbounds ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 1), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr inbounds ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 2), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 3), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 4), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 5), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 6), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 7), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 8), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 9), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 10), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 11), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 12), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 13), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 14), align 8 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 15), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 16), align 8 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 17), align 4 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 18), align 8 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 36), align 8 +; CHECK-NEXT: store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 19), align 8 +; CHECK-NEXT: ret void +; + store i32 1, ptr @Y, align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 1), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 2), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 3), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 4), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 5), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 6), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 7), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 8), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 9), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 10), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 11), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 12), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 13), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 14), align 8 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 15), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 16), align 8 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 17), align 4 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 18), align 8 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 36), align 8 + store i32 1, ptr getelementptr ([3 x %struct.X], ptr @Y, i64 0, i64 0, i32 0, i64 19), align 8 + ret void +} diff --git a/llvm/test/Transforms/InferAlignment/irregular-size.ll b/llvm/test/Transforms/InferAlignment/irregular-size.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/irregular-size.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s + +define i177 @ossfuzz_9880(i177 %X) { +; CHECK-LABEL: define i177 @ossfuzz_9880 +; CHECK-SAME: (i177 [[X:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = alloca i177, align 8 +; CHECK-NEXT: [[L1:%.*]] = load i177, ptr [[A]], align 4 +; CHECK-NEXT: ret i177 [[L1]] +; + %A = alloca i177 + %L1 = load i177, ptr %A + ret i177 %L1 +} diff --git a/llvm/test/Transforms/InferAlignment/load-assume.ll b/llvm/test/Transforms/InferAlignment/load-assume.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/load-assume.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck --check-prefixes=CHECK %s +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.assume(i1) +declare void @escape(ptr) + +; Check that the alignment has been upgraded and that the assume has not +; been removed: + +define i32 @foo1(ptr %a) { +; CHECK-LABEL: define i32 @foo1 +; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-NEXT: [[T0:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: ret i32 [[T0]] +; + %t0 = load i32, ptr %a, align 4 + %ptrint = ptrtoint ptr %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + ret i32 %t0 +} + +define i32 @foo2(ptr %a) { +; CHECK-LABEL: define i32 @foo2 +; CHECK-SAME: (ptr [[A:%.*]]) { +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: [[T0:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[T0]] +; + %ptrint = ptrtoint ptr %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %t0 = load i32, ptr %a, align 4 + ret i32 %t0 +} diff --git a/llvm/test/Transforms/InferAlignment/load-attr.ll b/llvm/test/Transforms/InferAlignment/load-attr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/load-attr.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @foo1(ptr align 32 %a) #0 { +; CHECK-LABEL: define i32 @foo1 +; CHECK-SAME: (ptr align 32 [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %0 = load i32, ptr %a, align 4 + ret i32 %0 + +} + +define i32 @foo2(ptr align 32 %a) #0 { +; CHECK-LABEL: define i32 @foo2 +; CHECK-SAME: (ptr align 32 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = call ptr @func1(ptr [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %v = call ptr @func1(ptr %a) + %0 = load i32, ptr %v, align 4 + ret i32 %0 + +} + +declare ptr @func1(ptr returned) nounwind + +attributes #0 = { nounwind uwtable } diff --git a/llvm/test/Transforms/InferAlignment/ptrmask.ll b/llvm/test/Transforms/InferAlignment/ptrmask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/ptrmask.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s + +declare ptr @llvm.ptrmask.p0.i64(ptr, i64) +declare ptr @llvm.ptrmask.p0.i32(ptr, i32) +declare ptr @llvm.ptrmask.p0.i128(ptr, i128) + +; TODO: alignment is not inferred here +define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align_unknown_ptr_align1 +; CHECK-SAME: (ptr align 1 [[PTR:%.*]], i64 [[MASK:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 [[MASK]]) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align_unknown_ptr_align8 +; CHECK-SAME: (ptr align 8 [[PTR:%.*]], i64 [[MASK:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 [[MASK]]) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +; Increase load align from 1 to 2 +define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align2_ptr_align1 +; CHECK-SAME: (ptr align 1 [[PTR:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -2) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -2) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +; Increase load align from 1 to 4 +define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align4_ptr_align1 +; CHECK-SAME: (ptr align 1 [[PTR:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -4) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -4) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +; Increase load align from 1 to 8 +define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align8_ptr_align1 +; CHECK-SAME: (ptr align 1 [[PTR:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -8) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +; Underlying alignment already the same as forced alignment by ptrmask +; TODO: Should be able to drop the ptrmask +define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align8_ptr_align8 +; CHECK-SAME: (ptr align 8 [[PTR:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -8) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +; Underlying alignment greater than alignment forced by ptrmask +; TODO: Should be able to drop the ptrmask +define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align8_ptr_align16 +; CHECK-SAME: (ptr align 16 [[PTR:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR]], i64 -8) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +; Increase load align from 1 to 8, and the mask type is smaller +; than the pointer size. +define <16 x i8> @ptrmask_align8_ptr_align1_smallmask(ptr align 1 %ptr) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align8_ptr_align1_smallmask +; CHECK-SAME: (ptr align 1 [[PTR:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR]], i32 -8) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i32(ptr %ptr, i32 -8) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + +; Increase load align from 1 to 8, and the mask type is larger +; than the pointer size. +define <16 x i8> @ptrmask_align8_ptr_align1_bigmask(ptr align 1 %ptr) { +; CHECK-LABEL: define <16 x i8> @ptrmask_align8_ptr_align1_bigmask +; CHECK-SAME: (ptr align 1 [[PTR:%.*]]) { +; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i128(ptr [[PTR]], i128 -8) +; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 +; CHECK-NEXT: ret <16 x i8> [[LOAD]] +; + %aligned = call ptr @llvm.ptrmask.p0.i128(ptr %ptr, i128 -8) + %load = load <16 x i8>, ptr %aligned, align 1 + ret <16 x i8> %load +} + diff --git a/llvm/test/Transforms/InferAlignment/store-2d-gep.ll b/llvm/test/Transforms/InferAlignment/store-2d-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/store-2d-gep.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s + +define void @foo2(ptr %b, i64 %n, i64 %u, i64 %y) nounwind { +; CHECK-LABEL: define void @foo2 +; CHECK-SAME: (ptr [[B:%.*]], i64 [[N:%.*]], i64 [[U:%.*]], i64 [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[D:%.*]] = and i64 [[C]], -16 +; CHECK-NEXT: [[E:%.*]] = inttoptr i64 [[D]] to ptr +; CHECK-NEXT: [[V:%.*]] = mul i64 [[U]], 2 +; CHECK-NEXT: [[Z:%.*]] = and i64 [[Y]], -2 +; CHECK-NEXT: [[T1421:%.*]] = icmp eq i64 [[N]], 0 +; CHECK-NEXT: br i1 [[T1421]], label [[RETURN:%.*]], label [[BB:%.*]] +; CHECK: bb: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BB]] ], [ 20, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = mul i64 [[I]], [[V]] +; CHECK-NEXT: [[H:%.*]] = add i64 [[J]], [[Z]] +; CHECK-NEXT: [[T8:%.*]] = getelementptr double, ptr [[E]], i64 [[H]] +; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[T8]], align 8 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN]], label [[BB]] +; CHECK: return: +; CHECK-NEXT: ret void +; +entry: + %c = ptrtoint ptr %b to i64 + %d = and i64 %c, -16 + %e = inttoptr i64 %d to ptr + %v = mul i64 %u, 2 + %z = and i64 %y, -2 + %t1421 = icmp eq i64 %n, 0 + br i1 %t1421, label %return, label %bb + +bb: + %i = phi i64 [ %indvar.next, %bb ], [ 20, %entry ] + %j = mul i64 %i, %v + %h = add i64 %j, %z + %t8 = getelementptr double, ptr %e, i64 %h + store <2 x double>, ptr %t8, align 8 + %indvar.next = add i64 %i, 1 + %exitcond = icmp eq i64 %indvar.next, %n + br i1 %exitcond, label %return, label %bb + +return: + ret void +} + +; A multi-dimensional array in a nested loop doing vector stores that +; aren't yet aligned. InferAlignment can understand the addressing in the +; Nice case to prove 16 byte alignment. In the Awkward case, the inner +; array dimension is not even, so the stores to it won't always be +; aligned. InferAlignment should prove alignment in exactly one of the two +; stores. + +@Nice = global [1001 x [20000 x double]] zeroinitializer, align 32 +@Awkward = global [1001 x [20001 x double]] zeroinitializer, align 32 + +define void @foo() nounwind { +; CHECK-LABEL: define void @foo +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB7_OUTER:%.*]] +; CHECK: bb7.outer: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT26:%.*]], [[BB11:%.*]] ] +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, [[BB7_OUTER]] ], [ [[INDVAR_NEXT:%.*]], [[BB1]] ] +; CHECK-NEXT: [[T4:%.*]] = getelementptr [1001 x [20000 x double]], ptr @Nice, i64 0, i64 [[I]], i64 [[J]] +; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[T4]], align 8 +; CHECK-NEXT: [[S4:%.*]] = getelementptr [1001 x [20001 x double]], ptr @Awkward, i64 0, i64 [[I]], i64 [[J]] +; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[S4]], align 8 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[J]], 2 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 556 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB11]], label [[BB1]] +; CHECK: bb11: +; CHECK-NEXT: [[INDVAR_NEXT26]] = add i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND27:%.*]] = icmp eq i64 [[INDVAR_NEXT26]], 991 +; CHECK-NEXT: br i1 [[EXITCOND27]], label [[RETURN_SPLIT:%.*]], label [[BB7_OUTER]] +; CHECK: return.split: +; CHECK-NEXT: ret void +; +entry: + br label %bb7.outer + +bb7.outer: + %i = phi i64 [ 0, %entry ], [ %indvar.next26, %bb11 ] + br label %bb1 + +bb1: + %j = phi i64 [ 0, %bb7.outer ], [ %indvar.next, %bb1 ] + + %t4 = getelementptr [1001 x [20000 x double]], ptr @Nice, i64 0, i64 %i, i64 %j + store <2 x double>, ptr %t4, align 8 + + %s4 = getelementptr [1001 x [20001 x double]], ptr @Awkward, i64 0, i64 %i, i64 %j + store <2 x double>, ptr %s4, align 8 + + %indvar.next = add i64 %j, 2 + %exitcond = icmp eq i64 %indvar.next, 556 + br i1 %exitcond, label %bb11, label %bb1 + +bb11: + %indvar.next26 = add i64 %i, 1 + %exitcond27 = icmp eq i64 %indvar.next26, 991 + br i1 %exitcond27, label %return.split, label %bb7.outer + +return.split: + ret void +} diff --git a/llvm/test/Transforms/InferAlignment/store-alloca.ll b/llvm/test/Transforms/InferAlignment/store-alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/store-alloca.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S -data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefixes=ALL,CHECK +; RUN: opt < %s -passes=no-op-function -S -data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefixes=ALL,P32 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s -check-prefixes=NODL + +define void @test3() { +; ALL-LABEL: define void @test3() { +; ALL-NEXT: [[A:%.*]] = alloca { i32 }, align 4 +; ALL-NEXT: [[B:%.*]] = getelementptr { i32 }, ptr [[A]], i32 0, i32 0 +; ALL-NEXT: store i32 123, ptr [[B]], align 4 +; ALL-NEXT: ret void +; +; NODL-LABEL: define void @test3() { +; NODL-NEXT: [[A:%.*]] = alloca { i32 }, align 8 +; NODL-NEXT: [[B:%.*]] = getelementptr { i32 }, ptr [[A]], i32 0, i32 0 +; NODL-NEXT: store i32 123, ptr [[B]], align 4 +; NODL-NEXT: ret void +; + %A = alloca { i32 } ; [#uses=1] + %B = getelementptr { i32 }, ptr %A, i32 0, i32 0 ; [#uses=1] + store i32 123, ptr %B + ret void +} + +define void @test5() { +; CHECK-LABEL: define void @test5() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca { i32 }, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 123, ptr [[A]], align 4 +; CHECK-NEXT: store ptr [[A]], ptr [[B]], align 8 +; CHECK-NEXT: store i32 123, ptr [[B]], align 4 +; CHECK-NEXT: store atomic i32 2, ptr [[A]] unordered, align 4 +; CHECK-NEXT: store atomic i32 3, ptr [[A]] release, align 4 +; CHECK-NEXT: store atomic i32 4, ptr [[A]] seq_cst, align 4 +; CHECK-NEXT: [[C_1:%.*]] = addrspacecast ptr [[C]] to ptr addrspace(1) +; CHECK-NEXT: store i32 123, ptr addrspace(1) [[C_1]], align 4 +; CHECK-NEXT: ret void +; +; P32-LABEL: define void @test5() { +; P32-NEXT: entry: +; P32-NEXT: [[A:%.*]] = alloca { i32 }, align 4 +; P32-NEXT: [[B:%.*]] = alloca ptr, align 4 +; P32-NEXT: [[C:%.*]] = alloca i32, align 4 +; P32-NEXT: store i32 123, ptr [[A]], align 4 +; P32-NEXT: store ptr [[A]], ptr [[B]], align 4 +; P32-NEXT: store i32 123, ptr [[B]], align 4 +; P32-NEXT: store atomic i32 2, ptr [[A]] unordered, align 4 +; P32-NEXT: store atomic i32 3, ptr [[A]] release, align 4 +; P32-NEXT: store atomic i32 4, ptr [[A]] seq_cst, align 4 +; P32-NEXT: [[C_1:%.*]] = addrspacecast ptr [[C]] to ptr addrspace(1) +; P32-NEXT: store i32 123, ptr addrspace(1) [[C_1]], align 4 +; P32-NEXT: ret void +; +; NODL-LABEL: define void @test5() { +; NODL-NEXT: entry: +; NODL-NEXT: [[A:%.*]] = alloca { i32 }, align 8 +; NODL-NEXT: [[B:%.*]] = alloca ptr, align 8 +; NODL-NEXT: [[C:%.*]] = alloca i32, align 4 +; NODL-NEXT: store i32 123, ptr [[A]], align 4 +; NODL-NEXT: store ptr [[A]], ptr [[B]], align 8 +; NODL-NEXT: store i32 123, ptr [[B]], align 4 +; NODL-NEXT: store atomic i32 2, ptr [[A]] unordered, align 4 +; NODL-NEXT: store atomic i32 3, ptr [[A]] release, align 4 +; NODL-NEXT: store atomic i32 4, ptr [[A]] seq_cst, align 4 +; NODL-NEXT: [[C_1:%.*]] = addrspacecast ptr [[C]] to ptr addrspace(1) +; NODL-NEXT: store i32 123, ptr addrspace(1) [[C_1]], align 4 +; NODL-NEXT: ret void +; + +entry: + %a = alloca { i32 } + %b = alloca ptr + %c = alloca i32 + store i32 123, ptr %a + store ptr %a, ptr %b + store i32 123, ptr %b + store atomic i32 2, ptr %a unordered, align 4 + store atomic i32 3, ptr %a release, align 4 + store atomic i32 4, ptr %a seq_cst, align 4 + %c.1 = addrspacecast ptr %c to ptr addrspace(1) + store i32 123, ptr addrspace(1) %c.1 + ret void +} + +declare void @f(ptr %p) + +define void @test6() { +; ALL-LABEL: define void @test6() { +; ALL-NEXT: entry: +; ALL-NEXT: [[A:%.*]] = alloca { i32 }, align 4 +; ALL-NEXT: [[B:%.*]] = alloca i32, align 4 +; ALL-NEXT: store volatile i32 123, ptr [[A]], align 4 +; ALL-NEXT: tail call void @f(ptr [[B]]) +; ALL-NEXT: ret void +; +; NODL-LABEL: define void @test6() { +; NODL-NEXT: entry: +; NODL-NEXT: [[A:%.*]] = alloca { i32 }, align 8 +; NODL-NEXT: [[B:%.*]] = alloca i32, align 4 +; NODL-NEXT: store volatile i32 123, ptr [[A]], align 4 +; NODL-NEXT: tail call void @f(ptr [[B]]) +; NODL-NEXT: ret void +; + +entry: + %a = alloca { i32 } + %b = alloca i32 + store volatile i32 123, ptr %a + tail call void @f(ptr %b) + ret void +} diff --git a/llvm/test/Transforms/InferAlignment/vector.ll b/llvm/test/Transforms/InferAlignment/vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/vector.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=no-op-function -S | FileCheck %s + +; InferAlignment should be able to prove vector alignment in the +; presence of a few mild address computation tricks. + +; ------------------------------------------------------------------------------ +; load instructions +; ------------------------------------------------------------------------------ + +@x = external global <2 x i64>, align 16 + +define <2 x i64> @load_test1() { +; CHECK-LABEL: define <2 x i64> @load_test1() { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @x, align 1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %tmp1 = load <2 x i64>, ptr @x, align 1 + ret <2 x i64> %tmp1 +} + +define <2 x i64> @load_test2() { +; CHECK-LABEL: define <2 x i64> @load_test2() { +; CHECK-NEXT: [[T:%.*]] = alloca <2 x i64>, align 16 +; CHECK-NEXT: call void @use(ptr [[T]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[T]], align 1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %t = alloca <2 x i64> + call void @use(ptr %t) + %tmp1 = load <2 x i64>, ptr %t, align 1 + ret <2 x i64> %tmp1 +} + +; When we see a unaligned load from an insufficiently aligned global or +; alloca, increase the alignment of the load, turning it into an aligned load. + +@GLOBAL = internal global [4 x i32] zeroinitializer + +define <16 x i8> @load_test3(<2 x i64> %x) { +; CHECK-LABEL: define <16 x i8> @load_test3 +; CHECK-SAME: (<2 x i64> [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = load <16 x i8>, ptr @GLOBAL, align 1 +; CHECK-NEXT: ret <16 x i8> [[TMP]] +; +entry: + %tmp = load <16 x i8>, ptr @GLOBAL, align 1 + ret <16 x i8> %tmp +} + +@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer + +define <16 x i8> @load_test3_as1(<2 x i64> %x) { +; CHECK-LABEL: define <16 x i8> @load_test3_as1 +; CHECK-SAME: (<2 x i64> [[X:%.*]]) { +; CHECK-NEXT: [[TMP:%.*]] = load <16 x i8>, ptr addrspace(1) @GLOBAL_as1, align 1 +; CHECK-NEXT: ret <16 x i8> [[TMP]] +; + %tmp = load <16 x i8>, ptr addrspace(1) @GLOBAL_as1, align 1 + ret <16 x i8> %tmp +} + +@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer + +define <16 x i8> @load_test3_as1_gep(<2 x i64> %x) { +; CHECK-LABEL: define <16 x i8> @load_test3_as1_gep +; CHECK-SAME: (<2 x i64> [[X:%.*]]) { +; CHECK-NEXT: [[TMP:%.*]] = load <16 x i8>, ptr addrspace(1) getelementptr inbounds ([8 x i32], ptr addrspace(1) @GLOBAL_as1_gep, i16 0, i16 4), align 1 +; CHECK-NEXT: ret <16 x i8> [[TMP]] +; + %tmp = load <16 x i8>, ptr addrspace(1) getelementptr ([8 x i32], ptr addrspace(1) @GLOBAL_as1_gep, i16 0, i16 4), align 1 + ret <16 x i8> %tmp +} + +; ------------------------------------------------------------------------------ +; store instructions +; ------------------------------------------------------------------------------ + +define void @store_test1(<2 x i64> %y) { +; CHECK-LABEL: define void @store_test1 +; CHECK-SAME: (<2 x i64> [[Y:%.*]]) { +; CHECK-NEXT: store <2 x i64> [[Y]], ptr @x, align 1 +; CHECK-NEXT: ret void +; + store <2 x i64> %y, ptr @x, align 1 + ret void +} + +define void @store_test2(<2 x i64> %y) { +; CHECK-LABEL: define void @store_test2 +; CHECK-SAME: (<2 x i64> [[Y:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = alloca <2 x i64>, align 16 +; CHECK-NEXT: call void @use(ptr [[T]]) +; CHECK-NEXT: store <2 x i64> [[Y]], ptr [[T]], align 1 +; CHECK-NEXT: ret void +; + %t = alloca <2 x i64> + call void @use(ptr %t) + store <2 x i64> %y, ptr %t, align 1 + ret void +} + +declare void @use(ptr %t) diff --git a/llvm/test/Transforms/InferAlignment/vscale.ll b/llvm/test/Transforms/InferAlignment/vscale.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAlignment/vscale.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=no-op-function -S < %s | FileCheck %s + +; These tests serve to verify code changes when underlying gep ptr is alloca. +define i32 @gep_alloca_vscale_zero() { +; CHECK-LABEL: define i32 @gep_alloca_vscale_zero() { +; CHECK-NEXT: [[A:%.*]] = alloca , align 16 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr , ptr [[A]], i32 0, i32 2 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[TMP]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %a = alloca + %tmp = getelementptr , ptr %a, i32 0, i32 2 + %load = load i32, ptr %tmp + ret i32 %load +} + +define void @scalable4i32_to_fixed16i32(ptr %out) { +; CHECK-LABEL: define void @scalable4i32_to_fixed16i32 +; CHECK-SAME: (ptr [[OUT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP]], align 16 +; CHECK-NEXT: [[RELOAD:%.*]] = load volatile <16 x i32>, ptr [[TMP]], align 16 +; CHECK-NEXT: store <16 x i32> [[RELOAD]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca , align 16 + store <16 x i32> zeroinitializer, ptr %tmp, align 16 + %reload = load volatile <16 x i32>, ptr %tmp, align 16 + store <16 x i32> %reload, ptr %out, align 16 + ret void +} + +define void @scalable16i32_to_fixed16i32(ptr %out) { +; CHECK-LABEL: define void @scalable16i32_to_fixed16i32 +; CHECK-SAME: (ptr [[OUT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store volatile <16 x i32> zeroinitializer, ptr [[TMP]], align 16 +; CHECK-NEXT: [[RELOAD:%.*]] = load volatile <16 x i32>, ptr [[TMP]], align 16 +; CHECK-NEXT: store <16 x i32> [[RELOAD]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca , align 16 + store volatile <16 x i32> zeroinitializer, ptr %tmp, align 16 + %reload = load volatile <16 x i32>, ptr %tmp, align 16 + store <16 x i32> %reload, ptr %out, align 16 + ret void +} + +define void @scalable32i32_to_scalable16i32(ptr %out) { +; CHECK-LABEL: define void @scalable32i32_to_scalable16i32 +; CHECK-SAME: (ptr [[OUT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store volatile zeroinitializer, ptr [[TMP]], align 16 +; CHECK-NEXT: [[RELOAD:%.*]] = load volatile , ptr [[TMP]], align 16 +; CHECK-NEXT: store [[RELOAD]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca , align 16 + store volatile zeroinitializer, ptr %tmp, align 16 + %reload = load volatile , ptr %tmp, align 16 + store %reload, ptr %out, align 16 + ret void +} + +define void @scalable32i16_to_scalable16i32(ptr %out) { +; CHECK-LABEL: define void @scalable32i16_to_scalable16i32 +; CHECK-SAME: (ptr [[OUT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store volatile zeroinitializer, ptr [[TMP]], align 16 +; CHECK-NEXT: [[RELOAD:%.*]] = load volatile , ptr [[TMP]], align 16 +; CHECK-NEXT: store [[RELOAD]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca , align 16 + store volatile zeroinitializer, ptr %tmp, align 16 + %reload = load volatile , ptr %tmp, align 16 + store %reload, ptr %out, align 16 + ret void +} + +define void @scalable32i16_to_scalable16i32_multiuse(ptr %out, ptr %out2) { +; CHECK-LABEL: define void @scalable32i16_to_scalable16i32_multiuse +; CHECK-SAME: (ptr [[OUT:%.*]], ptr [[OUT2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = alloca , align 16 +; CHECK-NEXT: store volatile zeroinitializer, ptr [[TMP]], align 16 +; CHECK-NEXT: [[RELOAD:%.*]] = load volatile , ptr [[TMP]], align 16 +; CHECK-NEXT: store [[RELOAD]], ptr [[OUT]], align 16 +; CHECK-NEXT: [[RELOAD2:%.*]] = load volatile , ptr [[TMP]], align 16 +; CHECK-NEXT: store [[RELOAD2]], ptr [[OUT2]], align 16 +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca , align 16 + store volatile zeroinitializer, ptr %tmp, align 16 + %reload = load volatile , ptr %tmp, align 16 + store %reload, ptr %out, align 16 + %reload2 = load volatile , ptr %tmp, align 16 + store %reload2, ptr %out2, align 16 + ret void +}