diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22694,6 +22694,11 @@ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); + // Simplify scalar inserts into an undef vector: + // insert_subvector undef, (splat X), N2 -> splat X + if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); + // If we are inserting a bitcast value into an undef, with the same // number of elements, just use the bitcast input of the extract. // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -573,6 +573,42 @@ ret %v0 } +; Test constant predicate insert into undef +define @insert_nxv2i1_v8i1_const_true_into_undef() vscale_range(4,8) { +; CHECK-LABEL: insert_nxv2i1_v8i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv2i1.v8i1 ( undef, <8 x i1> , i64 0) + ret %v0 +} + +define @insert_nxv4i1_v16i1_const_true_into_undef() vscale_range(4,8) { +; CHECK-LABEL: insert_nxv4i1_v16i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv4i1.v16i1 ( undef, <16 x i1> , i64 0) + ret %v0 +} + +define @insert_nxv8i1_v32i1_const_true_into_undef() vscale_range(4,8) { +; CHECK-LABEL: insert_nxv8i1_v32i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv8i1.v32i1 ( undef, <32 x i1> , i64 0) + ret %v0 +} + +define @insert_nxv16i1_v64i1_const_true_into_undef() vscale_range(4,8) { +; CHECK-LABEL: insert_nxv16i1_v64i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv16i1.v64i1 ( undef, <64 x i1> , i64 0) + ret %v0 +} declare @llvm.experimental.vector.insert.nxv3i32.nxv2i32(, , i64) declare @llvm.experimental.vector.insert.nxv3f32.nxv2f32(, , i64) @@ -585,5 +621,9 @@ declare @llvm.experimental.vector.insert.nxv4bf16.v4bf16(, <4 x bfloat>, i64) declare @llvm.experimental.vector.insert.nxv2bf16.nxv2bf16(, , i64) +declare @llvm.experimental.vector.insert.nxv2i1.v8i1(, <8 x i1>, i64) +declare @llvm.experimental.vector.insert.nxv4i1.v16i1(, <16 x i1>, i64) +declare @llvm.experimental.vector.insert.nxv8i1.v32i1(, <32 x i1>, i64) declare @llvm.experimental.vector.insert.nx16i1.nxv4i1(, , i64) declare @llvm.experimental.vector.insert.nx16i1.nxv8i1(, , i64) +declare @llvm.experimental.vector.insert.nxv16i1.v64i1(, <64 x i1>, i64)