Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1235,6 +1235,8 @@ setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::nxv4i64, MVT::nxv4i32, Expand); + for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -518,6 +518,16 @@ ret %res } +define @masked_ld1w_i32( *%base, %mask) { +; CHECK-LABEL: masked_ld1w_i32: +; CHECK: ld1w { z1.s }, p0/z, [x0] +; CHECK: sunpklo z0.d, z1.s +; CHECK: sunpkhi z1.d, z1.s +; CHECK-NEXT: ret + %wide.masked.load = call @llvm.masked.load.nxv4i32.p0nxv4i32(* %base, i32 4, %mask, undef) + %res = sext %wide.masked.load to + ret %res +} declare @llvm.aarch64.sve.ld1rq.nxv16i8(, i8*) declare @llvm.aarch64.sve.ld1rq.nxv8i16(, i16*) @@ -563,3 +573,6 @@ declare @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(, bfloat*) declare @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(, float*) declare @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(, double*) + +declare @llvm.masked.load.nxv4i32.p0nxv4i32(*, i32 immarg, , ) +