diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12066,6 +12066,18 @@ } static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) { + if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 2); + + if (VTy->getElementType() == Type::getFloatTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 4); + + if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 8); + + if (VTy->getElementType() == Type::getHalfTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 8); + if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext())) return ScalableVectorType::get(VTy->getElementType(), 2); diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -351,6 +351,167 @@ ret void } +define void @load_double_factor4(<16 x double>* %ptr) #0 { +; CHECK-LABEL: @load_double_factor4( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to double* +; CHECK-NEXT: [[LDN:%.*]] = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP1]], double* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[LDN]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[LDN]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[LDN]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP7]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[LDN]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP9]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <16 x double>, <16 x double>* %ptr, align 4 + %v0 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + %v1 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + %v2 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + %v3 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + ret void +} + +define void @load_float_factor3(<24 x float>* %ptr) #0 { +; CHECK-LABEL: @load_float_factor3( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <24 x float>* [[PTR:%.*]] to float* +; CHECK-NEXT: [[LDN:%.*]] = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP1]], float* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[LDN]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[LDN]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[LDN]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP7]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <24 x float>, <24 x float>* %ptr, align 4 + %v0 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> + %v1 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> + %v2 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> + ret void +} + +define void @load_half_factor2(<32 x half>* %ptr) #0 { +; CHECK-LABEL: @load_half_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x half>* [[PTR:%.*]] to half* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP1]], half* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x half> @llvm.experimental.vector.extract.v16f16.nxv8f16( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x half> @llvm.experimental.vector.extract.v16f16.nxv8f16( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <32 x half>, <32 x half>* %ptr, align 4 + %v0 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> + %v1 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> + ret void +} + +define void @load_bfloat_factor2(<32 x bfloat>* %ptr) #0 { +; CHECK-LABEL: @load_bfloat_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x bfloat>* [[PTR:%.*]] to bfloat* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP1]], bfloat* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <32 x bfloat>, <32 x bfloat>* %ptr, align 4 + %v0 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> + %v1 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> + ret void +} + +define void @store_double_factor4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) #0 { +; CHECK-LABEL: @store_double_factor4( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to double* +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP3]], [[TMP5]], [[TMP7]], [[TMP9]], [[TMP1]], double* [[TMP10]]) +; CHECK-NEXT: ret void +; + %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> + %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> + %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> + store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 4 + ret void +} + +define void @store_float_factor3(<24 x float>* %ptr, <8 x float> %v0, <8 x float> %v1, <8 x float> %v2) #0 { +; CHECK-LABEL: @store_float_factor3( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <16 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <24 x float>* [[PTR:%.*]] to float* +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP3]], [[TMP5]], [[TMP7]], [[TMP1]], float* [[TMP8]]) +; CHECK-NEXT: ret void +; + %s0 = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> + %s1 = shufflevector <8 x float> %v2, <8 x float> poison, <16 x i32> + %interleaved.vec = shufflevector <16 x float> %s0, <16 x float> %s1, <24 x i32> + store <24 x float> %interleaved.vec, <24 x float>* %ptr, align 4 + ret void +} + +define void @store_half_factor2(<32 x half>* %ptr, <16 x half> %v0, <16 x half> %v1) #0 { +; CHECK-LABEL: @store_half_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[V0:%.*]], <16 x half> [[V1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v16f16( undef, <16 x half> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v16f16( undef, <16 x half> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x half>* [[PTR:%.*]] to half* +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP3]], [[TMP5]], [[TMP1]], half* [[TMP6]]) +; CHECK-NEXT: ret void +; + %interleaved.vec = shufflevector <16 x half> %v0, <16 x half> %v1, <32 x i32> + store <32 x half> %interleaved.vec, <32 x half>* %ptr, align 4 + ret void +} + + +define void @store_bfloat_factor2(<32 x bfloat>* %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1) #0 { +; CHECK-LABEL: @store_bfloat_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x bfloat> [[V0:%.*]], <16 x bfloat> [[V1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v16bf16( undef, <16 x bfloat> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v16bf16( undef, <16 x bfloat> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x bfloat>* [[PTR:%.*]] to bfloat* +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP3]], [[TMP5]], [[TMP1]], bfloat* [[TMP6]]) +; CHECK-NEXT: ret void +; + %interleaved.vec = shufflevector <16 x bfloat> %v0, <16 x bfloat> %v1, <32 x i32> + store <32 x bfloat> %interleaved.vec, <32 x bfloat>* %ptr, align 4 + ret void +} + attributes #0 = { vscale_range(2,2) "target-features"="+sve" } attributes #1 = { vscale_range(2,4) "target-features"="+sve" } attributes #2 = { vscale_range(4,4) "target-features"="+sve" }