Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2895,15 +2895,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) { - unsigned BitWidth = Op.getScalarValueSizeInBits(); - return KnownBits(BitWidth); - } - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnes(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorMinNumElements()) : APInt(1, 1); return computeKnownBits(Op, DemandedElts, Depth); } @@ -2917,11 +2910,6 @@ KnownBits Known(BitWidth); // Don't know anything. - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) - return Known; - if (auto *C = dyn_cast(Op)) { // We know all of the bits for a constant! return KnownBits::makeConstant(C->getAPIntValue()); @@ -2937,7 +2925,7 @@ KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || - NumElts == Op.getValueType().getVectorNumElements()) && + NumElts == Op.getValueType().getVectorMinNumElements()) && "Unexpected vector size"); if (!DemandedElts) @@ -2970,6 +2958,22 @@ break; } break; + case ISD::SPLAT_VECTOR: { + SDValue SrcOp = Op.getOperand(0); + Known = computeKnownBits(SrcOp, Depth + 1); + if (SrcOp.getValueSizeInBits() != BitWidth) { + assert(SrcOp.getValueSizeInBits() > BitWidth && + "Expected SPLAT_VECTOR implicit truncation"); + Known = Known.trunc(BitWidth); + } + break; + } + case ISD::STEP_VECTOR: { + const APInt &Step = Op.getConstantOperandAPInt(0); + if (Step.isPowerOf2()) + Known.Zero.setLowBits(Step.logBase2()); + break; + } case ISD::VECTOR_SHUFFLE: { // Collect the known bits that are shared by every vector element referenced // by the shuffle. @@ -3013,6 +3017,9 @@ break; } case ISD::CONCAT_VECTORS: { + if (Op.getValueType().isScalableVector()) + return Known; + // Split DemandedElts and test each of the demanded subvectors. Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVectorVT = Op.getOperand(0).getValueType(); @@ -3033,6 +3040,9 @@ break; } case ISD::INSERT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + return Known; + // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -3057,6 +3067,9 @@ break; } case ISD::EXTRACT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + return Known; + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); // Bail until we can represent demanded elements for scalable vectors. @@ -3069,6 +3082,9 @@ break; } case ISD::SCALAR_TO_VECTOR: { + if (Op.getValueType().isScalableVector()) + return Known; + // We know about scalar_to_vector as much as we know about it source, // which becomes the first element of otherwise unknown vector. if (DemandedElts != 1) @@ -3082,6 +3098,9 @@ break; } case ISD::BITCAST: { + if (Op.getValueType().isScalableVector()) + return Known; + SDValue N0 = Op.getOperand(0); EVT SubVT = N0.getValueType(); unsigned SubBitWidth = SubVT.getScalarSizeInBits(); @@ -3435,7 +3454,7 @@ } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorMinNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); Known = Known.zext(BitWidth); break; @@ -3447,7 +3466,7 @@ } case ISD::SIGN_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorMinNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. @@ -3463,7 +3482,7 @@ } case ISD::ANY_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorMinNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); Known = Known.anyext(BitWidth); break; @@ -3614,6 +3633,9 @@ break; } case ISD::INSERT_VECTOR_ELT: { + if (Op.getValueType().isScalableVector()) + return Known; + // If we know the element index, split the demand between the // source vector and the inserted element, otherwise assume we need // the original demanded vector elements and the value. Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -45,6 +45,21 @@ def FalseLanesZero : FalseLanesEnum<1>; def FalseLanesUndef : FalseLanesEnum<2>; +// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands +// have no common bits. +def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), + [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ + if (N->getOpcode() == ISD::ADD) + return true; + return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); +}]> { + let GISelPredicateCode = [{ + // Only handle G_ADD for now. FIXME. build capability to compute whether + // operands of G_OR have common bits set or not. + return MI.getOpcode() == TargetOpcode::G_ADD; + }]; +} + // AArch64 Instruction Format class AArch64Inst : Instruction { field bits<32> Inst; // Instruction encoding. Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6473,21 +6473,6 @@ VectorIndexS:$idx)), (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; -// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands -// have no common bits. -def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), - [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ - if (N->getOpcode() == ISD::ADD) - return true; - return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); -}]> { - let GISelPredicateCode = [{ - // Only handle G_ADD for now. FIXME. build capability to compute whether - // operands of G_OR have common bits set or not. - return MI.getOpcode() == TargetOpcode::G_ADD; - }]; -} - //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -5222,13 +5222,13 @@ (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -574,7 +574,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: and z1.d, z1.d, #0x1 -; CHECK-NEXT: add z1.d, z1.d, #8 // =0x8 +; CHECK-NEXT: orr z1.d, z1.d, #0x8 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.dupq.lane.nxv2i64( %a, i64 4) Index: llvm/test/CodeGen/AArch64/sve-knownbits.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-knownbits.ll +++ llvm/test/CodeGen/AArch64/sve-knownbits.ll @@ -4,8 +4,7 @@ define @test_knownzero( %x) { ; CHECK-LABEL: test_knownzero: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl z0.h, z0.h, #8 -; CHECK-NEXT: and z0.h, z0.h, #0x8 +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: ret %a1 = shl %x, shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) %a2 = and %a1, shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) Index: llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll +++ llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll @@ -9,15 +9,10 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xff ; CHECK-NEXT: and z0.d, z0.d, #0xff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: lsr z1.d, z2.d, #8 -; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsr z1.d, z0.d, #8 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -34,15 +29,10 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z1.s, z2.s, #8 -; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsr z1.s, z0.s, #8 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -59,15 +49,10 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z0.h, z0.h, #0xff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: lsr z1.h, z2.h, #8 -; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0 +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsr z1.h, z0.h, #8 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -164,15 +149,10 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffff ; CHECK-NEXT: and z0.d, z0.d, #0xffff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: lsr z1.d, z2.d, #16 -; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsr z1.d, z0.d, #16 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -189,15 +169,10 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z0.s, z0.s, #0xffff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z1.s, z2.s, #16 -; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsr z1.s, z0.s, #16 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -294,15 +269,10 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: lsr z1.d, z2.d, #32 -; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsr z1.d, z0.d, #32 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i32( %x, %y) %b = extractvalue { , } %a, 0 Index: llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp =================================================================== --- llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -108,10 +108,7 @@ auto DemandedElts = APInt(2, 3); KnownBits Known = DAG->computeKnownBits(Op, DemandedElts); - // We don't know anything for SVE at the moment. - EXPECT_EQ(Known.Zero, APInt(16, 0u)); - EXPECT_EQ(Known.One, APInt(16, 0u)); - EXPECT_FALSE(Known.isZero()); + EXPECT_TRUE(Known.isZero()); } TEST_F(AArch64SelectionDAGTest, computeKnownBits_EXTRACT_SUBVECTOR) {