Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1235,6 +1235,8 @@
     setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
     setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
 
+    setLoadExtAction(ISD::SEXTLOAD, MVT::nxv4i64, MVT::nxv4i32, Expand);
+
     for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
                     MVT::nxv4f32, MVT::nxv2f64}) {
       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -518,6 +518,16 @@
   ret <vscale x 8 x double> %res
 }
 
+define <vscale x 4 x i64> @masked_ld1w_i32(<vscale x 4 x i32> *%base, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: masked_ld1w_i32:
+; CHECK: ld1w	{ z1.s }, p0/z, [x0]
+; CHECK: sunpklo	z0.d, z1.s
+; CHECK: sunpkhi	z1.d, z1.s
+; CHECK-NEXT: ret
+  %wide.masked.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %base, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
+  %res = sext <vscale x 4 x i32> %wide.masked.load to <vscale x 4 x i64>
+  ret <vscale x 4 x i64> %res
+}
 
 declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1>, i8*)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1>, i16*)
@@ -563,3 +573,6 @@
 declare <vscale x 32 x bfloat> @llvm.aarch64.sve.ld4.nxv32bf16.nxv8i1.p0bf16(<vscale x 8 x i1>, bfloat*)
 declare <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1>, float*)
 declare <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1>, double*)
+
+declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>*, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>)
+