Index: llvm/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAG.h +++ llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1897,9 +1897,11 @@ /// Determine which bits of Op are known to be either zero or one and return /// them in Known. The DemandedElts argument allows us to only collect the - /// known bits that are shared by the requested vector elements. - /// Targets can implement the computeKnownBitsForTargetNode method in the - /// TargetLowering class to allow target nodes to be understood. + /// known bits that are shared by the requested vector elements. For scalable + /// vectors the DemandedElts must be getVectorMinNumElements in size and all + /// lanes must be demanded. Targets can implement the + /// computeKnownBitsForTargetNode method in the TargetLowering class to allow + /// target nodes to be understood. KnownBits computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned Depth = 0) const; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2889,15 +2889,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) { - unsigned BitWidth = Op.getScalarValueSizeInBits(); - return KnownBits(BitWidth); - } - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnes(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorMinNumElements()) : APInt(1, 1); return computeKnownBits(Op, DemandedElts, Depth); } @@ -2911,11 +2904,6 @@ KnownBits Known(BitWidth); // Don't know anything. - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) - return Known; - if (auto *C = dyn_cast(Op)) { // We know all of the bits for a constant! return KnownBits::makeConstant(C->getAPIntValue()); @@ -2931,8 +2919,10 @@ KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || - NumElts == Op.getValueType().getVectorNumElements()) && + NumElts == Op.getValueType().getVectorMinNumElements()) && "Unexpected vector size"); + assert((!Op.getValueType().isScalableVector() || DemandedElts.isAllOnes()) && + "Expected all demanded lanes from scalable vectors"); if (!DemandedElts) return Known; // No demanded elts, better to assume we don't know anything. @@ -2964,6 +2954,22 @@ break; } break; + case ISD::SPLAT_VECTOR: { + SDValue SrcOp = Op.getOperand(0); + Known = computeKnownBits(SrcOp, Depth + 1); + if (SrcOp.getValueSizeInBits() != BitWidth) { + assert(SrcOp.getValueSizeInBits() > BitWidth && + "Expected SPLAT_VECTOR implicit truncation"); + Known = Known.trunc(BitWidth); + } + break; + } + case ISD::STEP_VECTOR: { + const APInt &Step = Op.getConstantOperandAPInt(0); + if (Step.isPowerOf2()) + Known.Zero.setLowBits(Step.logBase2()); + break; + } case ISD::VECTOR_SHUFFLE: { // Collect the known bits that are shared by every vector element referenced // by the shuffle. @@ -3007,6 +3013,9 @@ break; } case ISD::CONCAT_VECTORS: { + if (Op.getValueType().isScalableVector()) + return Known; + // Split DemandedElts and test each of the demanded subvectors. Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVectorVT = Op.getOperand(0).getValueType(); @@ -3027,6 +3036,9 @@ break; } case ISD::INSERT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + return Known; + // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -3051,6 +3063,9 @@ break; } case ISD::EXTRACT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + return Known; + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); // Bail until we can represent demanded elements for scalable vectors. @@ -3063,6 +3078,9 @@ break; } case ISD::SCALAR_TO_VECTOR: { + if (Op.getValueType().isScalableVector()) + return Known; + // We know about scalar_to_vector as much as we know about it source, // which becomes the first element of otherwise unknown vector. if (DemandedElts != 1) @@ -3076,6 +3094,9 @@ break; } case ISD::BITCAST: { + if (Op.getValueType().isScalableVector()) + return Known; + SDValue N0 = Op.getOperand(0); EVT SubVT = N0.getValueType(); unsigned SubBitWidth = SubVT.getScalarSizeInBits(); @@ -3428,6 +3449,9 @@ break; } case ISD::ZERO_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + return Known; + EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3440,6 +3464,9 @@ break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + return Known; + EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3456,6 +3483,9 @@ break; } case ISD::ANY_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + return Known; + EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3608,6 +3638,9 @@ break; } case ISD::INSERT_VECTOR_ELT: { + if (Op.getValueType().isScalableVector()) + return Known; + // If we know the element index, split the demand between the // source vector and the inserted element, otherwise assume we need // the original demanded vector elements and the value. Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -5207,13 +5207,13 @@ (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), + def : Pat<(add_and_or_is_add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -574,7 +574,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: and z1.d, z1.d, #0x1 -; CHECK-NEXT: add z1.d, z1.d, #8 // =0x8 +; CHECK-NEXT: orr z1.d, z1.d, #0x8 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.dupq.lane.nxv2i64( %a, i64 4) Index: llvm/test/CodeGen/AArch64/sve-knownbits.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-knownbits.ll +++ llvm/test/CodeGen/AArch64/sve-knownbits.ll @@ -4,8 +4,7 @@ define @test_knownzero( %x) { ; CHECK-LABEL: test_knownzero: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl z0.h, z0.h, #8 -; CHECK-NEXT: and z0.h, z0.h, #0x8 +; CHECK-NEXT: mov z0.h, #0 // =0x0 ; CHECK-NEXT: ret %a1 = shl %x, shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) %a2 = and %a1, shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) Index: llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll +++ llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll @@ -9,15 +9,10 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xff ; CHECK-NEXT: and z0.d, z0.d, #0xff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: lsr z1.d, z2.d, #8 -; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsr z1.d, z0.d, #8 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -34,15 +29,10 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: and z0.s, z0.s, #0xff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z1.s, z2.s, #8 -; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsr z1.s, z0.s, #8 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -59,15 +49,10 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z0.h, z0.h, #0xff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: lsr z1.h, z2.h, #8 -; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, #0 +; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsr z1.h, z0.h, #8 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -164,15 +149,10 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffff ; CHECK-NEXT: and z0.d, z0.d, #0xffff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: lsr z1.d, z2.d, #16 -; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsr z1.d, z0.d, #16 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -189,15 +169,10 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z0.s, z0.s, #0xffff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: lsr z1.s, z2.s, #16 -; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsr z1.s, z0.s, #16 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -294,15 +269,10 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: lsr z1.d, z2.d, #32 -; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsr z1.d, z0.d, #32 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i32( %x, %y) %b = extractvalue { , } %a, 0