diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1300,7 +1300,7 @@ llvm_anyvector_ty, llvm_i64_ty ], - [IntrReadMem, IntrArgMemOnly]>; + [IntrReadMem]>; class AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic : Intrinsic<[], @@ -1329,7 +1329,7 @@ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, llvm_i64_ty ], - [IntrWriteMem, IntrArgMemOnly]>; + [IntrWriteMem]>; class SVE_gather_prf_SV diff --git a/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll b/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll --- a/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll +++ b/llvm/test/Transforms/LICM/AArch64/sve-load-hoist.ll @@ -26,5 +26,54 @@ ret void } +define void @no_hoist_gather(* %out_ptr, * %in_ptr, %ptr_vec, i64 %n, %pred) { +; CHECK-LABEL: @no_hoist_gather( +; CHECK: entry: +; CHECK-NOT: llvm.aarch64.sve.ld1.gather.scalar.offset +; CHECK: for.body: +; CHECK: llvm.aarch64.sve.ld1.gather.scalar.offset +entry: + br label %for.body + +for.body: + %i = phi i64 [0, %entry], [%inc, %for.body] + %gather = call @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64( %pred, %ptr_vec, i64 0) + %in_ptr_gep = getelementptr , * %in_ptr, i64 %i + %in_ptr_load = load , * %in_ptr_gep, align 8 + %sum = add %gather, %in_ptr_load + %out_ptr_gep = getelementptr , * %out_ptr, i64 %i + store %sum, * %out_ptr_gep, align 8 + %inc = add nuw nsw i64 %i, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +define void @no_hoist_scatter(* %out_ptr, * %in_ptr, %ptr_vec, i64 %n, %pred) { +; CHECK-LABEL: @no_hoist_load1_nxv2i64( +; CHECK: entry: +; CHECK-NOT: load +; CHECK: for.body: +; CHECK: load +entry: + br label %for.body + +for.body: + %i = phi i64 [0, %entry], [%inc, %for.body] + %in_ptr_load = load , * %in_ptr, align 8 + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64( %in_ptr_load, %pred, %ptr_vec, i64 %i) + %inc = add nuw nsw i64 %i, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly +declare void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64(, , , i64) + +declare @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(, , i64)