diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5052,7 +5052,10 @@ // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. - if (Opcode >= ISD::BUILTIN_OP_END) + // We can't create a scalar CONCAT_VECTORS so skip it. It will break + // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by + // foldCONCAT_VECTORS in getNode before this is called. + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) return SDValue(); // For now, the array Ops should only contain two values. @@ -5092,27 +5095,20 @@ if (GlobalAddressSDNode *GA = dyn_cast(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); - // TODO: All the folds below are performed lane-by-lane and assume a fixed - // vector width, however we should be able to do constant folds involving - // splat vector nodes too. - if (VT.isScalableVector()) - return SDValue(); - // For fixed width vectors, extract each constant element and fold them // individually. Either input may be an undef value. - auto *BV1 = dyn_cast(N1); - if (!BV1 && !N1->isUndef()) + bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || + N1->getOpcode() == ISD::SPLAT_VECTOR; + if (!IsBVOrSV1 && !N1->isUndef()) return SDValue(); - auto *BV2 = dyn_cast(N2); - if (!BV2 && !N2->isUndef()) + bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR || + N2->getOpcode() == ISD::SPLAT_VECTOR; + if (!IsBVOrSV2 && !N2->isUndef()) return SDValue(); // If both operands are undef, that's handled the same way as scalars. - if (!BV1 && !BV2) + if (!IsBVOrSV1 && !IsBVOrSV2) return SDValue(); - assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) && - "Vector binop with different number of elements in operands?"); - EVT SVT = VT.getScalarType(); EVT LegalSVT = SVT; if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { @@ -5120,19 +5116,40 @@ if (LegalSVT.bitsLT(SVT)) return SDValue(); } + SmallVector Outputs; - unsigned NumOps = BV1 ? BV1->getNumOperands() : BV2->getNumOperands(); + // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need + // one iteration for that. + // TODO: Should we use one iteration fixed length vectors when only + // SPLAT_VECTOR or UNDEF are present? + unsigned NumOps = VT.isScalableVector() ? 1 : VT.getVectorNumElements(); for (unsigned I = 0; I != NumOps; ++I) { - SDValue V1 = BV1 ? BV1->getOperand(I) : getUNDEF(SVT); - SDValue V2 = BV2 ? BV2->getOperand(I) : getUNDEF(SVT); + // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need + // to use operand 0 of the SPLAT_VECTOR for each fixed element. + SDValue V1; + if (N1->getOpcode() == ISD::BUILD_VECTOR) + V1 = N1->getOperand(I); + else if (N1->getOpcode() == ISD::SPLAT_VECTOR) + V1 = N1->getOperand(0); + else + V1 = getUNDEF(SVT); + + SDValue V2; + if (N2->getOpcode() == ISD::BUILD_VECTOR) + V2 = N2->getOperand(I); + else if (N2->getOpcode() == ISD::SPLAT_VECTOR) + V2 = N2->getOperand(0); + else + V2 = getUNDEF(SVT); + if (SVT.isInteger()) { - if (V1->getValueType(0).bitsGT(SVT)) + if (V1.getValueType().bitsGT(SVT)) V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); - if (V2->getValueType(0).bitsGT(SVT)) + if (V2.getValueType().bitsGT(SVT)) V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); } - if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + if (V1.getValueType() != SVT || V2.getValueType() != SVT) return SDValue(); // Fold one vector element. @@ -5147,10 +5164,17 @@ Outputs.push_back(ScalarResult); } + if (VT.isScalableVector()) { + assert(Outputs.size() == 1 && "Vector size mismatch!"); + return getSplatVector(VT, SDLoc(), Outputs[0]); + } + assert(VT.getVectorNumElements() == Outputs.size() && "Vector size mismatch!"); // Build a big vector out of the scalar elements we generated. + // TODO: If the inputs were fixed length SPLAT_VECTORs, should this return + // a SPLAT_VECTOR? return getBuildVector(VT, SDLoc(), Outputs); } diff --git a/llvm/test/CodeGen/AArch64/pr49781.ll b/llvm/test/CodeGen/AArch64/pr49781.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr49781.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +define @foo( %a) { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %b = sub %a, %idx + %c = sub %b, %idx + ret %c +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv32.ll @@ -38,13 +38,11 @@ } ; Test constant adds to see if we can optimize them away for scalable vectors. -; FIXME: We can't. define @vadd_ii_nxv1i8_1() { ; CHECK-LABEL: vadd_ii_nxv1i8_1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; CHECK-NEXT: vmv.v.i v25, 2 -; CHECK-NEXT: vadd.vi v8, v25, 3 +; CHECK-NEXT: vmv.v.i v8, 5 ; CHECK-NEXT: ret %heada = insertelement undef, i8 2, i32 0 %splata = shufflevector %heada, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode-rv64.ll @@ -37,6 +37,21 @@ ret %vc } +; Test constant adds to see if we can optimize them away for scalable vectors. +define @vadd_ii_nxv1i8_1() { +; CHECK-LABEL: vadd_ii_nxv1i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.i v8, 5 +; CHECK-NEXT: ret + %heada = insertelement undef, i8 2, i32 0 + %splata = shufflevector %heada, undef, zeroinitializer + %headb = insertelement undef, i8 3, i32 0 + %splatb = shufflevector %headb, undef, zeroinitializer + %vc = add %splata, %splatb + ret %vc +} + define @vadd_vx_nxv2i8( %va, i8 signext %b) { ; CHECK-LABEL: vadd_vx_nxv2i8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll @@ -11,13 +11,9 @@ ; CHECK-NEXT: vmul.vx v25, v8, a0 ; CHECK-NEXT: addi a0, zero, 42 ; CHECK-NEXT: vadd.vx v25, v25, a0 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vrsub.vi v27, v26, 0 -; CHECK-NEXT: vand.vi v27, v27, 7 -; CHECK-NEXT: vsll.vv v27, v25, v27 -; CHECK-NEXT: vand.vi v26, v26, 7 -; CHECK-NEXT: vsrl.vv v25, v25, v26 -; CHECK-NEXT: vor.vv v25, v25, v27 +; CHECK-NEXT: vsll.vi v26, v25, 7 +; CHECK-NEXT: vsrl.vi v25, v25, 1 +; CHECK-NEXT: vor.vv v25, v25, v26 ; CHECK-NEXT: vmsleu.vx v0, v25, a0 ; CHECK-NEXT: ret %head_six = insertelement undef, i8 6, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv32.ll @@ -36,6 +36,21 @@ ret %vc } +; Test constant subs to see if we can optimize them away for scalable vectors. +define @vsub_ii_nxv1i8_1() { +; CHECK-LABEL: vsub_ii_nxv1i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: ret + %heada = insertelement undef, i8 2, i32 0 + %splata = shufflevector %heada, undef, zeroinitializer + %headb = insertelement undef, i8 3, i32 0 + %splatb = shufflevector %headb, undef, zeroinitializer + %vc = sub %splata, %splatb + ret %vc +} + define @vsub_vv_nxv2i8( %va, %vb) { ; CHECK-LABEL: vsub_vv_nxv2i8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode-rv64.ll @@ -36,6 +36,21 @@ ret %vc } +; Test constant subs to see if we can optimize them away for scalable vectors. +define @vsub_ii_nxv1i8_1() { +; CHECK-LABEL: vsub_ii_nxv1i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.i v8, -1 +; CHECK-NEXT: ret + %heada = insertelement undef, i8 2, i32 0 + %splata = shufflevector %heada, undef, zeroinitializer + %headb = insertelement undef, i8 3, i32 0 + %splatb = shufflevector %headb, undef, zeroinitializer + %vc = sub %splata, %splatb + ret %vc +} + define @vsub_vv_nxv2i8( %va, %vb) { ; CHECK-LABEL: vsub_vv_nxv2i8: ; CHECK: # %bb.0: