diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp --- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -889,6 +889,10 @@ Type *LoadTy = CI->getType(); Align Alignment = DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); + // The scalarization pass currently doesn't produce loops, which + // would be required for scalarizing scalable vectors. + if (isa(LoadTy)) + return false; if (TTI->isLegalMaskedGather(LoadTy, Alignment)) return false; scalarizeMaskedGather(CI, ModifiedDT); @@ -900,6 +904,10 @@ Type *StoreTy = CI->getArgOperand(0)->getType(); Align Alignment = DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); + // The scalarization pass currently doesn't produce loops, which + // would be required for scalarizing scalable vectors. + if (isa(StoreTy)) + return false; if (TTI->isLegalMaskedScatter(StoreTy, Alignment)) return false; scalarizeMaskedScatter(CI, ModifiedDT); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1399,7 +1399,7 @@ VectorType *ReferenceType = dyn_cast(ArgTys[RefArgNumber]); VectorType *ThisArgVecTy = dyn_cast(Ty); if (!ThisArgVecTy || !ReferenceType || - (ReferenceType->getNumElements() != ThisArgVecTy->getNumElements())) + (ReferenceType->getElementCount() != ThisArgVecTy->getElementCount())) return true; PointerType *ThisArgEltTy = dyn_cast(ThisArgVecTy->getElementType()); diff --git a/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll b/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm-masked-gather-legal-for-sve.ll @@ -0,0 +1,63 @@ +; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Testing that masked gathers operating on scalable vectors that are +; packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_gather_nxv4i32( +; CHECK: call @llvm.masked.gather.nxv4i32 +define @masked_gather_nxv4i32( %ld, %masks, %passthro) { + %res = call @llvm.masked.gather.nxv4i32( %ld, i32 0, %masks, %passthro) + ret %res +} + +; Testing that masked gathers operating on scalable vectors of FP data +; that is packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_gather_nxv2f64( +; CHECK: call @llvm.masked.gather.nxv2f64 +define @masked_gather_nxv2f64( %ld, %masks, %passthro) { + %res = call @llvm.masked.gather.nxv2f64( %ld, i32 0, %masks, %passthro) + ret %res +} + +; Testing that masked gathers operating on scalable vectors of FP data +; that is unpacked in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_gather_nxv2f16( +; CHECK: call @llvm.masked.gather.nxv2f16 +define @masked_gather_nxv2f16( %ld, %masks, %passthro) { + %res = call @llvm.masked.gather.nxv2f16( %ld, i32 0, %masks, %passthro) + ret %res +} + +; Testing that masked gathers operating on 64-bit fixed vectors are +; scalarized because NEON doesn't have support for masked gather +; instructions. + +; CHECK-LABEL: @masked_gather_v2f32( +; CHECK-NOT: @llvm.masked.gather.v2f32( +define <2 x float> @masked_gather_v2f32(<2 x float*> %ld, <2 x i1> %masks, <2 x float> %passthro) { + %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro) + ret <2 x float> %res +} + +; Testing that masked gathers operating on 128-bit fixed vectors are +; scalarized because NEON doesn't have support for masked gather +; instructions and because we are not targeting fixed width SVE. + +; CHECK-LABEL: @masked_gather_v4i32( +; CHECK-NOT: @llvm.masked.gather.v4i32( +define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x i1> %masks, <4 x i32> %passthro) { + %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthro) + ret <4 x i32> %res +} + +declare @llvm.masked.gather.nxv4i32( %ptrs, i32 %align, %masks, %passthro) +declare @llvm.masked.gather.nxv2f64( %ptrs, i32 %align, %masks, %passthro) +declare @llvm.masked.gather.nxv2f16( %ptrs, i32 %align, %masks, %passthro) +declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro) +declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro) diff --git a/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll b/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm-masked-scatter-legal-for-sve.ll @@ -0,0 +1,63 @@ +; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -scalarize-masked-mem-intrin -S < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Testing that masked scatters operating on scalable vectors that are +; packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_scatter_nxv4i32( +; CHECK: call void @llvm.masked.scatter.nxv4i32 +define void @masked_scatter_nxv4i32( %data, %ptrs, %masks) { + call void @llvm.masked.scatter.nxv4i32( %data, %ptrs, i32 0, %masks) + ret void +} + +; Testing that masked scatters operating on scalable vectors of FP +; data that is packed in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_scatter_nxv2f64( +; CHECK: call void @llvm.masked.scatter.nxv2f64 +define void @masked_scatter_nxv2f64( %data, %ptrs, %masks) { + call void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 0, %masks) + ret void +} + +; Testing that masked scatters operating on scalable vectors of FP +; data that is unpacked in SVE registers are not scalarized. + +; CHECK-LABEL: @masked_scatter_nxv2f16( +; CHECK: call void @llvm.masked.scatter.nxv2f16 +define void @masked_scatter_nxv2f16( %data, %ptrs, %masks) { + call void @llvm.masked.scatter.nxv2f16( %data, %ptrs, i32 0, %masks) + ret void +} + +; Testing that masked scatters operating on 64-bit fixed vectors are +; scalarized because NEON doesn't have support for masked scatter +; instructions. + +; CHECK-LABEL: @masked_scatter_v2f32( +; CHECK-NOT: @llvm.masked.scatter.v2f32( +define void @masked_scatter_v2f32(<2 x float> %data, <2 x float*> %ptrs, <2 x i1> %masks) { + call void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 0, <2 x i1> %masks) + ret void +} + +; Testing that masked scatters operating on 128-bit fixed vectors are +; scalarized because NEON doesn't have support for masked scatter +; instructions and because we are not targeting fixed width SVE. + +; CHECK-LABEL: @masked_scatter_v4i32( +; CHECK-NOT: @llvm.masked.scatter.v4i32( +define void @masked_scatter_v4i32(<4 x i32> %data, <4 x i32*> %ptrs, <4 x i1> %masks) { + call void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks) + ret void +} + +declare void @llvm.masked.scatter.nxv4i32( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.nxv2f64( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.nxv2f16( %data, %ptrs, i32 %align, %masks) +declare void @llvm.masked.scatter.v2f32(<2 x float> %data, <2 x float*> %ptrs, i32 %align, <2 x i1> %masks) +declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 %align, <4 x i1> %masks)