Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12056,6 +12056,18 @@ } static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) { + if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 2); + + if (VTy->getElementType() == Type::getFloatTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 4); + + if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 8); + + if (VTy->getElementType() == Type::getHalfTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 8); + if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext())) return ScalableVectorType::get(VTy->getElementType(), 2); Index: llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll =================================================================== --- llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -351,6 +351,74 @@ ret void } +define void @load_double_factor2(<8 x double>* %ptr) #0 { +; CHECK-LABEL: @load_double_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x double>* [[PTR:%.*]] to double* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP1]], double* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <8 x double>, <8 x double>* %ptr, align 4 + %v0 = shufflevector <8 x double> %interleaved.vec, <8 x double> poison, <4 x i32> + %v1 = shufflevector <8 x double> %interleaved.vec, <8 x double> poison, <4 x i32> + ret void +} + +define void @load_float_factor2(<16 x float>* %ptr) #0 { +; CHECK-LABEL: @load_float_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float>* [[PTR:%.*]] to float* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP1]], float* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <16 x float>, <16 x float>* %ptr, align 4 + %v0 = shufflevector <16 x float> %interleaved.vec, <16 x float> poison, <8 x i32> + %v1 = shufflevector <16 x float> %interleaved.vec, <16 x float> poison, <8 x i32> + ret void +} + +define void @load_half_factor2(<32 x half>* %ptr) #0 { +; CHECK-LABEL: @load_half_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x half>* [[PTR:%.*]] to half* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP1]], half* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x half> @llvm.experimental.vector.extract.v16f16.nxv8f16( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x half> @llvm.experimental.vector.extract.v16f16.nxv8f16( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <32 x half>, <32 x half>* %ptr, align 4 + %v0 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> + %v1 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> + ret void +} + +define void @load_bfloat_factor2(<32 x bfloat>* %ptr) #0 { +; CHECK-LABEL: @load_bfloat_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x bfloat>* [[PTR:%.*]] to bfloat* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP1]], bfloat* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <32 x bfloat>, <32 x bfloat>* %ptr, align 4 + %v0 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> + %v1 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> + ret void +} + attributes #0 = { vscale_range(2,2) "target-features"="+sve" } attributes #1 = { vscale_range(2,4) "target-features"="+sve" } attributes #2 = { vscale_range(4,4) "target-features"="+sve" }