diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -10961,6 +10961,16 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, bool AllowTruncation) { + EVT VT = N.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorMinNumElements()) + : APInt(1, 1); + return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation); +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, + bool AllowUndefs, + bool AllowTruncation) { if (ConstantSDNode *CN = dyn_cast(N)) return CN; @@ -10976,36 +10986,13 @@ } } - if (BuildVectorSDNode *BV = dyn_cast(N)) { - BitVector UndefElements; - ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); - - // BuildVectors can truncate their operands. Ignore that case here unless - // AllowTruncation is set. - if (CN && (UndefElements.none() || AllowUndefs)) { - EVT CVT = CN->getValueType(0); - EVT NSVT = N.getValueType().getScalarType(); - assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); - if (AllowTruncation || (CVT == NSVT)) - return CN; - } - } - - return nullptr; -} - -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, - bool AllowUndefs, - bool AllowTruncation) { - if (ConstantSDNode *CN = dyn_cast(N)) - return CN; - if (BuildVectorSDNode *BV = dyn_cast(N)) { BitVector UndefElements; ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); // BuildVectors can truncate their operands. Ignore that case here unless // AllowTruncation is set. + // TODO: Look into whether we should allow UndefElements in non-DemandedElts if (CN && (UndefElements.none() || AllowUndefs)) { EVT CVT = CN->getValueType(0); EVT NSVT = N.getValueType().getScalarType(); @@ -11019,21 +11006,11 @@ } ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { - if (ConstantFPSDNode *CN = dyn_cast(N)) - return CN; - - if (BuildVectorSDNode *BV = dyn_cast(N)) { - BitVector UndefElements; - ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); - if (CN && (UndefElements.none() || AllowUndefs)) - return CN; - } - - if (N.getOpcode() == ISD::SPLAT_VECTOR) - if (ConstantFPSDNode *CN = dyn_cast(N.getOperand(0))) - return CN; - - return nullptr; + EVT VT = N.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorMinNumElements()) + : APInt(1, 1); + return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs); } ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, @@ -11046,10 +11023,15 @@ BitVector UndefElements; ConstantFPSDNode *CN = BV->getConstantFPSplatNode(DemandedElts, &UndefElements); + // TODO: Look into whether we should allow UndefElements in non-DemandedElts if (CN && (UndefElements.none() || AllowUndefs)) return CN; } + if (N.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantFPSDNode *CN = dyn_cast(N.getOperand(0))) + return CN; + return nullptr; } diff --git a/llvm/test/CodeGen/AArch64/sve-knownbits.ll b/llvm/test/CodeGen/AArch64/sve-knownbits.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-knownbits.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s + +define @test_knownzero( %x) { +; CHECK-LABEL: test_knownzero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ret + %a1 = shl %x, shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) + %a2 = and %a1, shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) + ret %a2 +} + +define @asrlsr( %va) { +; CHECK-LABEL: asrlsr: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z1.d, z1.d, #15 +; CHECK-NEXT: lsr z0.d, z0.d, #15 +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %head = insertelement poison, i32 15, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vb = zext %splat to + %x = ashr %va, %vb + %y = trunc %x to + ret %y +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll @@ -34,7 +34,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv1i32_sext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vnsra.wi v8, v8, 15 +; CHECK-NEXT: vnsrl.wi v8, v8, 15 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -76,7 +76,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv2i32_sext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vnsra.wi v10, v8, 15 +; CHECK-NEXT: vnsrl.wi v10, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0 @@ -119,7 +119,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv4i32_sext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vnsra.wi v12, v8, 15 +; CHECK-NEXT: vnsrl.wi v12, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0 @@ -162,7 +162,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv8i32_sext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vnsra.wi v16, v8, 15 +; CHECK-NEXT: vnsrl.wi v16, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0 @@ -203,7 +203,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv1i32_zext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vnsra.wi v8, v8, 15 +; CHECK-NEXT: vnsrl.wi v8, v8, 15 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -245,7 +245,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv2i32_zext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vnsra.wi v10, v8, 15 +; CHECK-NEXT: vnsrl.wi v10, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0 @@ -288,7 +288,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv4i32_zext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vnsra.wi v12, v8, 15 +; CHECK-NEXT: vnsrl.wi v12, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0 @@ -331,7 +331,7 @@ ; CHECK-LABEL: vnsra_wi_i32_nxv8i32_zext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vnsra.wi v16, v8, 15 +; CHECK-NEXT: vnsrl.wi v16, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %head = insertelement poison, i32 15, i32 0