diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -669,7 +669,7 @@ : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, - NoCapture>, WriteOnly>, + NoCapture>, WriteOnly>, ImmArg>, ImmArg>]>; // FIXME: Add version of these floating point intrinsics which allow non-default @@ -1799,14 +1799,15 @@ DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg>]>; + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg>, + NoCapture>]>; def int_masked_store: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, - ImmArg>]>; + ImmArg>, NoCapture>]>; def int_masked_gather: DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -1824,13 +1825,14 @@ DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrWillReturn]>; + [IntrReadMem, IntrWillReturn, NoCapture>]>; def int_masked_compressstore: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrWriteMem, IntrArgMemOnly, IntrWillReturn]>; + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, + NoCapture>]>; // Test whether a pointer is associated with a type metadata identifier. def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], diff --git a/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; Make sure some masked/load store intrinsics have the expected attributes +; Specifically `nocapture' should be added to the pointer paramters for the loads/stores + +; CHECK: declare @llvm.masked.load.nxv2i64.p0(ptr nocapture, i32 immarg, , ) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] +declare @llvm.masked.load.nxv2i64.p0(ptr, i32, , ) + +; CHECK: declare void @llvm.masked.store.nxv2i64.p0(, ptr nocapture, i32 immarg, ) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY:#[0-9]+]] +declare void @llvm.masked.store.nxv2i64.p0(, ptr, i32, ) + +; CHECK: declare <16 x float> @llvm.masked.expandload.v16f32(ptr nocapture, <16 x i1>, <16 x float>) [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN:#[0-9]+]] +declare <16 x float> @llvm.masked.expandload.v16f32 (ptr, <16 x i1>, <16 x float>) + +; CHECK: declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr nocapture, <8 x i1>) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY:#[0-9]+]] +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) + +; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY]] = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } +; CHECK: attributes [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind readonly willreturn } diff --git a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll --- a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll +++ b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll @@ -1,16 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -opaque-pointers -passes=instcombine < %s | FileCheck %s +; RUN: opt -S -passes=instcombine < %s | FileCheck %s @contant_int_array = private unnamed_addr constant [10 x i64] [i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9] ; InstCombine should be able to optimize out the alloca and memcpy: define void @combine_masked_load_store_from_constant_array(ptr %ptr) { ; CHECK-LABEL: @combine_masked_load_store_from_constant_array( -; CHECK-NEXT: [[TMP1:%.*]] = alloca [10 x i64], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(80) [[TMP1]], ptr noundef nonnull align 16 dereferenceable(80) @contant_int_array, i64 80, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10) -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr nonnull [[TMP1]], i32 8, [[TMP2]], zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP3]], ptr [[PTR:%.*]], i32 1, [[TMP2]]) +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr nonnull @contant_int_array, i32 8, [[TMP1]], zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP2]], ptr [[PTR:%.*]], i32 1, [[TMP1]]) ; CHECK-NEXT: ret void ; %1 = alloca [10 x i64] @@ -21,7 +19,22 @@ ret void } +define void @combine_masked_expandload_compressstore_from_constant_array(ptr %ptr) { +; CHECK-LABEL: @combine_masked_expandload_compressstore_from_constant_array( +; CHECK-NEXT: [[TMP1:%.*]] = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull @contant_int_array, <10 x i1> , <10 x i64> zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.compressstore.v10i64(<10 x i64> [[TMP1]], ptr [[PTR:%.*]], <10 x i1> ) +; CHECK-NEXT: ret void +; + %1 = alloca [10 x i64] + call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr @contant_int_array, i64 80, i1 false) + %2 = call <10 x i64> @llvm.masked.expandload.v10i64(ptr nonnull %1, <10 x i1> , <10 x i64> zeroinitializer) + call void @llvm.masked.compressstore.nxv10i64.p0(<10 x i64> %2, ptr %ptr, <10 x i1> ) + ret void +} + declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) declare @llvm.masked.load.nxv2i64.p0(ptr, i32, , ) declare void @llvm.masked.store.nxv2i64.p0(, ptr, i32, ) declare @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32, i32) +declare <10 x i64> @llvm.masked.expandload.v10i64(ptr, <10 x i1>, <10 x i64>) +declare void @llvm.masked.compressstore.nxv10i64.p0(<10 x i64>, ptr, <10 x i1>) diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -752,12 +752,12 @@ // CHECK-DAG: declare <48 x float> @llvm.matrix.column.major.load.v48f32.i64(ptr nocapture, i64, i1 immarg, i32 immarg, i32 immarg) // CHECK-DAG: declare void @llvm.matrix.column.major.store.v48f32.i64(<48 x float>, ptr nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) // CHECK-DAG: declare <7 x i1> @llvm.get.active.lane.mask.v7i1.i64(i64, i64) -// CHECK-DAG: declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32 immarg, <7 x i1>, <7 x float>) -// CHECK-DAG: declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32 immarg, <7 x i1>) +// CHECK-DAG: declare <7 x float> @llvm.masked.load.v7f32.p0(ptr nocapture, i32 immarg, <7 x i1>, <7 x float>) +// CHECK-DAG: declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr nocapture, i32 immarg, <7 x i1>) // CHECK-DAG: declare <7 x float> @llvm.masked.gather.v7f32.v7p0(<7 x ptr>, i32 immarg, <7 x i1>, <7 x float>) // CHECK-DAG: declare void @llvm.masked.scatter.v7f32.v7p0(<7 x float>, <7 x ptr>, i32 immarg, <7 x i1>) -// CHECK-DAG: declare <7 x float> @llvm.masked.expandload.v7f32(ptr, <7 x i1>, <7 x float>) -// CHECK-DAG: declare void @llvm.masked.compressstore.v7f32(<7 x float>, ptr, <7 x i1>) +// CHECK-DAG: declare <7 x float> @llvm.masked.expandload.v7f32(ptr nocapture, <7 x i1>, <7 x float>) +// CHECK-DAG: declare void @llvm.masked.compressstore.v7f32(<7 x float>, ptr nocapture, <7 x i1>) // CHECK-DAG: declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) // CHECK-DAG: declare void @llvm.memcpy.inline.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64 immarg, i1 immarg) // CHECK-DAG: declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)