diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -618,19 +618,27 @@ ElementCount PartNumElts = PartVT.getVectorElementCount(); ElementCount ValueNumElts = ValueVT.getVectorElementCount(); - // We only support widening vectors with equivalent element types and - // fixed/scalable properties. If a target needs to widen a fixed-length type - // to a scalable one, it should be possible to use INSERT_SUBVECTOR below. + // We only support widening vectors with equivalent fixed/scalable properties. + // If a target needs to widen a fixed-length type to a scalable one, it should + // be possible to use INSERT_SUBVECTOR below. if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) || - PartNumElts.isScalable() != ValueNumElts.isScalable() || - PartVT.getVectorElementType() != ValueVT.getVectorElementType()) + PartNumElts.isScalable() != ValueNumElts.isScalable()) return SDValue(); // Widening a scalable vector to another scalable vector is done by inserting // the vector into a larger undef one. - if (PartNumElts.isScalable()) + if (PartNumElts.isScalable()) { + EVT PromotedVT = EVT::getVectorVT(*DAG.getContext(), PartVT.getScalarType(), + ValueVT.getVectorElementCount()); + if (PartVT.getVectorElementType() != ValueVT.getVectorElementType() && + PartVT.getVectorElementType().isInteger()) + Val = DAG.getAnyExtOrTrunc(Val, DL, PromotedVT); return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), Val, DAG.getVectorIdxConstant(0, DL)); + } + + if (PartVT.getVectorElementType() != ValueVT.getVectorElementType()) + return SDValue(); EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in diff --git a/llvm/test/CodeGen/AArch64/sve-split-load.ll b/llvm/test/CodeGen/AArch64/sve-split-load.ll --- a/llvm/test/CodeGen/AArch64/sve-split-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-load.ll @@ -36,7 +36,7 @@ ret %load } -define @load_widen_6i16(* %a) { +define @load_widen_6i16(* %a) { ; CHECK-LABEL: load_widen_6i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -47,11 +47,10 @@ ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h ; CHECK-NEXT: ret %load = load , * %a, align 1 - %r = call @llvm.experimental.vector.insert.nxv8i16.nxv6i16( undef, %load, i64 0) - ret %r + ret %load } -define @load_widen_1i32(* %a) { +define @load_widen_1i32(* %a) { ; CHECK-LABEL: load_widen_1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: cntw x8 @@ -65,11 +64,10 @@ ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %load = load , * %a, align 1 - %r = call @llvm.experimental.vector.insert.nxv4i32.nxv1i32( undef, %load, i64 0) - ret %r + ret %load } -define @load_widen_3i32(* %a) { +define @load_widen_3i32(* %a) { ; CHECK-LABEL: load_widen_3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: cntw x8 @@ -80,11 +78,10 @@ ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %load = load , * %a, align 1 - %r = call @llvm.experimental.vector.insert.nxv4i32.nxv3i32( undef, %load, i64 0) - ret %r + ret %load } -define @load_widen_6i32(* %a) { +define @load_widen_6i32(* %a) { ; CHECK-LABEL: load_widen_6i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -94,11 +91,10 @@ ; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s ; CHECK-NEXT: ret %load = load , * %a, align 1 - %r = call @llvm.experimental.vector.insert.nxv8i32.nxv6i32( undef, %load, i64 0) - ret %r + ret %load } -define @load_widen_7i32(* %a) { +define @load_widen_7i32(* %a) { ; CHECK-LABEL: load_widen_7i32: ; CHECK: // %bb.0: ; CHECK-NEXT: cntw x9 @@ -114,8 +110,7 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret %load = load , * %a, align 1 - %r = call @llvm.experimental.vector.insert.nxv8i32.nxv7i32( undef, %load, i64 0) - ret %r + ret %load } define @load_split_32i16(* %a) { @@ -218,7 +213,7 @@ ret %load } -define @masked_load_widen_6i16(* %a, %pg.wide) { +define @masked_load_widen_6i16(* %a, %pg) { ; CHECK-LABEL: masked_load_widen_6i16: ; CHECK: // %bb.0: ; CHECK-NEXT: cnth x8 @@ -235,30 +230,27 @@ ; CHECK-NEXT: and p0.b, p2/z, p1.b, p0.b ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ret - %pg = call @llvm.experimental.vector.extract.nxv6i1.nxv8i1( %pg.wide, i64 0) %load = call @llvm.masked.load.nxv6i16( *%a, i32 1, %pg, undef) - %r = call @llvm.experimental.vector.insert.nxv8i16.nxv6i16( undef, %load, i64 0) - ret %r + ret %load } -define @masked_load_widen_1i32(* %a, %pg.wide) { +define @masked_load_widen_1i32(* %a, %pg) { ; CHECK-LABEL: masked_load_widen_1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: cntw x8 ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: cmphi p2.s, p1/z, z1.s, z0.s +; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s ; CHECK-NEXT: and p0.b, p1/z, p2.b, p0.b ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret - %pg = call @llvm.experimental.vector.extract.nxv1i1.nxv4i1( %pg.wide, i64 0) %load = call @llvm.masked.load.nxv1i32( *%a, i32 1, %pg, undef) - %r = call @llvm.experimental.vector.insert.nxv4i32.nxv1i32( undef, %load, i64 0) - ret %r + ret %load } -define @masked_load_widen_3i32(* %a, %pg.wide) { +define @masked_load_widen_3i32(* %a, %pg) { ; CHECK-LABEL: masked_load_widen_3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: cntw x8 @@ -269,13 +261,11 @@ ; CHECK-NEXT: and p0.b, p1/z, p2.b, p0.b ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret - %pg = call @llvm.experimental.vector.extract.nxv3i1.nxv4i1( %pg.wide, i64 0) %load = call @llvm.masked.load.nxv3i32( *%a, i32 1, %pg, undef) - %r = call @llvm.experimental.vector.insert.nxv4i32.nxv3i32( undef, %load, i64 0) - ret %r + ret %load } -define @masked_load_widen_6i32(* %a, %pg.wide) { +define @masked_load_widen_6i32(* %a, %pg) { ; CHECK-LABEL: masked_load_widen_6i32: ; CHECK: // %bb.0: ; CHECK-NEXT: cnth x8 @@ -296,13 +286,11 @@ ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret - %pg = call @llvm.experimental.vector.extract.nxv6i1.nxv8i1( %pg.wide, i64 0) %load = call @llvm.masked.load.nxv6i32( *%a, i32 1, %pg, undef) - %r = call @llvm.experimental.vector.insert.nxv8i32.nxv6i32( undef, %load, i64 0) - ret %r + ret %load } -define @masked_load_widen_7i32(* %a, %pg.wide) { +define @masked_load_widen_7i32(* %a, %pg) { ; CHECK-LABEL: masked_load_widen_7i32: ; CHECK: // %bb.0: ; CHECK-NEXT: cnth x8 @@ -323,10 +311,8 @@ ; CHECK-NEXT: ld1w { z0.s }, p2/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret - %pg = call @llvm.experimental.vector.extract.nxv7i1.nxv8i1( %pg.wide, i64 0) %load = call @llvm.masked.load.nxv7i32( *%a, i32 1, %pg, undef) - %r = call @llvm.experimental.vector.insert.nxv8i32.nxv7i32( undef, %load, i64 0) - ret %r + ret %load } declare @llvm.masked.load.nxv32i8(*, i32, , ) @@ -339,12 +325,3 @@ declare @llvm.masked.load.nxv7i32(*, i32, , ) declare @llvm.masked.load.nxv8i32(*, i32, , ) declare @llvm.masked.load.nxv8i64(*, i32, , ) -declare @llvm.experimental.vector.insert.nxv8i16.nxv6i16(, , i64) -declare @llvm.experimental.vector.insert.nxv4i32.nxv1i32(, , i64) -declare @llvm.experimental.vector.insert.nxv4i32.nxv3i32(, , i64) -declare @llvm.experimental.vector.insert.nxv8i32.nxv6i32(, , i64) -declare @llvm.experimental.vector.insert.nxv8i32.nxv7i32(, , i64) -declare @llvm.experimental.vector.extract.nxv1i1.nxv4i1(, i64) -declare @llvm.experimental.vector.extract.nxv3i1.nxv4i1(, i64) -declare @llvm.experimental.vector.extract.nxv6i1.nxv8i1(, i64) -declare @llvm.experimental.vector.extract.nxv7i1.nxv8i1(, i64)