diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22581,6 +22581,11 @@ } } + // Simplify scalar inserts into an undef vector: + // insert_subvector undef, (splat X), N2 -> splat X + if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); + // Canonicalize insert_subvector dag nodes. // Example: // (insert_subvector (insert_subvector A, Idx0), Idx1) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10092,10 +10092,9 @@ // lowering code. if (auto *ConstVal = dyn_cast(SplatVal)) { // We can hande the zero case during isel. - if (ConstVal->isZero()) - return Op; - if (ConstVal->isOne()) + if (ConstVal->getZExtValue() & 0x1) return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all); + return Op; } // The general case of i1. There isn't any natural way to do this, // so we use some trickery with whilelo. diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+bf16 < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mattr=+sve -mattr=+bf16 < %s | FileCheck %s --check-prefixes=CHECK + +target triple = "aarch64-unknown-linux-gnu" define @insert_v2i64_nxv2i64( %vec, <2 x i64> %subvec) nounwind { ; CHECK-LABEL: insert_v2i64_nxv2i64: @@ -571,6 +573,42 @@ ret %v0 } +; Test constant preicate insert into undef +define @insert_nxv2i1_v8i1_const_true_into_undef() vscale_range(4,4) { +; CHECK-LABEL: insert_nxv2i1_v8i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv2i1.v8i1 ( undef, <8 x i1> , i64 0) + ret %v0 +} + +define @insert_nxv4i1_v16i1_const_true_into_undef() vscale_range(4,4) { +; CHECK-LABEL: insert_nxv4i1_v16i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv4i1.v16i1 ( undef, <16 x i1> , i64 0) + ret %v0 +} + +define @insert_nxv8i1_v32i1_const_true_into_undef() vscale_range(4,4) { +; CHECK-LABEL: insert_nxv8i1_v32i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv8i1.v32i1 ( undef, <32 x i1> , i64 0) + ret %v0 +} + +define @insert_nxv16i1_v64i1_const_true_into_undef() vscale_range(4,4) { +; CHECK-LABEL: insert_nxv16i1_v64i1_const_true_into_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv16i1.v64i1 ( undef, <64 x i1> , i64 0) + ret %v0 +} declare @llvm.experimental.vector.insert.nxv3i32.nxv2i32(, , i64) declare @llvm.experimental.vector.insert.nxv3f32.nxv2f32(, , i64) @@ -583,5 +621,9 @@ declare @llvm.experimental.vector.insert.nxv4bf16.v4bf16(, <4 x bfloat>, i64) declare @llvm.experimental.vector.insert.nxv2bf16.nxv2bf16(, , i64) +declare @llvm.experimental.vector.insert.nxv2i1.v8i1(, <8 x i1>, i64) +declare @llvm.experimental.vector.insert.nxv4i1.v16i1(, <16 x i1>, i64) +declare @llvm.experimental.vector.insert.nxv8i1.v32i1(, <32 x i1>, i64) declare @llvm.experimental.vector.insert.nx16i1.nxv4i1(, , i64) declare @llvm.experimental.vector.insert.nx16i1.nxv8i1(, , i64) +declare @llvm.experimental.vector.insert.nxv16i1.v64i1(, <64 x i1>, i64)