Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23328,11 +23328,6 @@ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); - // Simplify scalar inserts into an undef vector: - // insert_subvector undef, (splat X), N2 -> splat X - if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) - return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); - // If we are inserting a bitcast value into an undef, with the same // number of elements, just use the bitcast input of the extract. // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -809,11 +809,11 @@ continue; } + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to // it before type legalization for i64 vectors on RV32. It will then be // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle. - // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs - // improvements first. if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) { setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -675,7 +675,9 @@ define @insert_nxv2i1_v8i1_const_true_into_undef() vscale_range(4,8) { ; CHECK-LABEL: insert_nxv2i1_v8i1_const_true_into_undef: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #1 // =0x1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ret %v0 = call @llvm.vector.insert.nxv2i1.v8i1 ( undef, <8 x i1> , i64 0) ret %v0 @@ -684,7 +686,9 @@ define @insert_nxv4i1_v16i1_const_true_into_undef() vscale_range(4,8) { ; CHECK-LABEL: insert_nxv4i1_v16i1_const_true_into_undef: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #1 // =0x1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ret %v0 = call @llvm.vector.insert.nxv4i1.v16i1 ( undef, <16 x i1> , i64 0) ret %v0 @@ -693,7 +697,9 @@ define @insert_nxv8i1_v32i1_const_true_into_undef() vscale_range(4,8) { ; CHECK-LABEL: insert_nxv8i1_v32i1_const_true_into_undef: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #1 // =0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ret %v0 = call @llvm.vector.insert.nxv8i1.v32i1 ( undef, <32 x i1> , i64 0) ret %v0 @@ -702,7 +708,9 @@ define @insert_nxv16i1_v64i1_const_true_into_undef() vscale_range(4,8) { ; CHECK-LABEL: insert_nxv16i1_v64i1_const_true_into_undef: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, #1 // =0x1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: ret %v0 = call @llvm.vector.insert.nxv16i1.v64i1 ( undef, <64 x i1> , i64 0) ret %v0 Index: llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -1062,7 +1062,7 @@ ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a1 @@ -1086,7 +1086,7 @@ ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v9, v10, a0 @@ -1110,7 +1110,7 @@ ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v12, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v10, v12, a0 @@ -1158,7 +1158,7 @@ ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfmv.v.f v9, ft0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v8, v9, a1 @@ -1180,7 +1180,7 @@ ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: fmv.h.x ft0, zero ; CHECK-NEXT: fneg.h ft0, ft0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v9, v10, a0 @@ -1204,7 +1204,7 @@ ; CHECK-NEXT: flh ft0, %lo(.LCPI73_0)(a1) ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfmv.v.f v12, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vx v10, v12, a0