Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -3598,7 +3598,9 @@ /// the original and new nodes in Old and New. Otherwise, analyze the /// expression and return a mask of KnownOne and KnownZero bits for the /// expression (used to simplify the caller). The KnownZero/One bits may only - /// be accurate for those bits in the Demanded masks. + /// be accurate for those bits in the Demanded masks. For scalable vectors the + /// DemandedElts must be getVectorMinNumElements in size and all lanes must be + /// demanded. /// \p AssumeSingleUse When this parameter is true, this function will /// attempt to simplify \p Op even if there are multiple uses. /// Callers are responsible for correctly updating the DAG based on the Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -633,17 +633,8 @@ bool AssumeSingleUse) const { EVT VT = Op.getValueType(); - // TODO: We can probably do more work on calculating the known bits and - // simplifying the operations for scalable vectors, but for now we just - // bail out. - if (VT.isScalableVector()) { - // Pretend we don't know anything for now. - Known = KnownBits(DemandedBits.getBitWidth()); - return false; - } - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnes(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorMinNumElements()) : APInt(1, 1); return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, AssumeSingleUse); @@ -663,6 +654,9 @@ if (Op.isUndef()) return SDValue(); + assert((!Op.getValueType().isScalableVector() || DemandedElts.isAllOnes()) && + "Expected all demanded lanes from scalable vectors"); + // Not demanding any bits/elts from Op. if (DemandedBits == 0 || DemandedElts == 0) return DAG.getUNDEF(Op.getValueType()); @@ -673,6 +667,9 @@ KnownBits LHSKnown, RHSKnown; switch (Op.getOpcode()) { case ISD::BITCAST: { + if (Op.getValueType().isScalableVector()) + return SDValue(); + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); @@ -831,6 +828,9 @@ break; } case ISD::INSERT_VECTOR_ELT: { + if (Op.getValueType().isScalableVector()) + return SDValue(); + // If we don't demand the inserted element, return the base vector. SDValue Vec = Op.getOperand(0); auto *CIdx = dyn_cast(Op.getOperand(2)); @@ -841,6 +841,9 @@ break; } case ISD::INSERT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + return SDValue(); + SDValue Vec = Op.getOperand(0); SDValue Sub = Op.getOperand(1); uint64_t Idx = Op.getConstantOperandVal(2); @@ -896,7 +899,7 @@ unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnes(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorMinNumElements()) : APInt(1, 1); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, Depth); @@ -1055,17 +1058,14 @@ // Don't know anything. Known = KnownBits(BitWidth); - // TODO: We can probably do more work on calculating the known bits and - // simplifying the operations for scalable vectors, but for now we just - // bail out. - if (Op.getValueType().isScalableVector()) - return false; - bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || - NumElts == Op.getValueType().getVectorNumElements()) && + NumElts == Op.getValueType().getVectorMinNumElements()) && "Unexpected vector size"); + assert((!Op.getValueType().isScalableVector() || + OriginalDemandedElts.isAllOnes()) && + "Expected all demanded DemandedElts from scalable vectors"); APInt DemandedBits = OriginalDemandedBits; APInt DemandedElts = OriginalDemandedElts; @@ -1152,6 +1152,9 @@ break; } case ISD::INSERT_VECTOR_ELT: { + if (VT.isScalableVector()) + return false; + SDValue Vec = Op.getOperand(0); SDValue Scl = Op.getOperand(1); auto *CIdx = dyn_cast(Op.getOperand(2)); @@ -1188,6 +1191,9 @@ return false; } case ISD::INSERT_SUBVECTOR: { + if (VT.isScalableVector()) + return false; + // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -1231,6 +1237,9 @@ break; } case ISD::EXTRACT_SUBVECTOR: { + if (VT.isScalableVector()) + return false; + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); if (Src.getValueType().isScalableVector()) @@ -1256,6 +1265,9 @@ break; } case ISD::CONCAT_VECTORS: { + if (VT.isScalableVector()) + return false; + Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVT = Op.getOperand(0).getValueType(); @@ -2129,7 +2141,7 @@ SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorMinNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. @@ -2166,7 +2178,7 @@ SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorMinNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. @@ -2218,7 +2230,7 @@ SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorMinNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG; // If we only need the bottom element then we can just bitcast. @@ -2356,6 +2368,9 @@ break; } case ISD::BITCAST: { + if (VT.isScalableVector()) + return false; + SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); Index: llvm/test/CodeGen/AArch64/active_lane_mask.ll =================================================================== --- llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -100,7 +100,6 @@ ; CHECK-NEXT: mov z1.h, w1 ; CHECK-NEXT: umin z0.h, z0.h, #255 ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h ; CHECK-NEXT: ret @@ -119,7 +118,6 @@ ; CHECK-NEXT: mov z1.s, w1 ; CHECK-NEXT: umin z0.s, z0.s, #255 ; CHECK-NEXT: and z1.s, z1.s, #0xff -; CHECK-NEXT: and z0.s, z0.s, #0xff ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s ; CHECK-NEXT: ret @@ -135,12 +133,11 @@ ; CHECK-NEXT: mov z1.d, x0 ; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: and z1.d, z1.d, #0xff -; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: mov z2.d, x1 -; CHECK-NEXT: umin z0.d, z0.d, #255 +; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: and z2.d, z2.d, #0xff -; CHECK-NEXT: and z0.d, z0.d, #0xff +; CHECK-NEXT: umin z0.d, z0.d, #255 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll @@ -40,7 +40,6 @@ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -56,15 +55,14 @@ ; VBITS_GE_256-LABEL: select_v32f16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.h, w9 -; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 ; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h @@ -80,7 +78,6 @@ ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: mov z2.h, w8 -; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] @@ -101,7 +98,6 @@ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -122,7 +118,6 @@ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -169,7 +164,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -185,15 +179,14 @@ ; VBITS_GE_256-LABEL: select_v16f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.s, w9 -; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 ; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s @@ -209,7 +202,6 @@ ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: mov z2.s, w8 -; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] @@ -230,7 +222,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -251,7 +242,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -298,7 +288,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -314,15 +303,14 @@ ; VBITS_GE_256-LABEL: select_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.d, x9 -; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 ; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d @@ -338,7 +326,6 @@ ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: mov z2.d, x8 -; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] @@ -359,7 +346,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -380,7 +366,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll @@ -40,7 +40,6 @@ ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -56,15 +55,14 @@ ; VBITS_GE_256-LABEL: select_v64i8: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov w8, #32 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.b ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.b, w9 -; VBITS_GE_256-NEXT: and z4.b, z4.b, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z4.b, #0 ; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b ; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z2.b @@ -80,7 +78,6 @@ ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.b ; VBITS_GE_512-NEXT: mov z2.b, w8 -; VBITS_GE_512-NEXT: and z2.b, z2.b, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] @@ -101,7 +98,6 @@ ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -122,7 +118,6 @@ ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] @@ -169,7 +164,6 @@ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -185,15 +179,14 @@ ; VBITS_GE_256-LABEL: select_v32i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.h, w9 -; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 ; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h @@ -209,7 +202,6 @@ ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: mov z2.h, w8 -; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] @@ -230,7 +222,6 @@ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -251,7 +242,6 @@ ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -298,7 +288,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -314,15 +303,14 @@ ; VBITS_GE_256-LABEL: select_v16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.s, w9 -; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 ; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s @@ -338,7 +326,6 @@ ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: mov z2.s, w8 -; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] @@ -359,7 +346,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -380,7 +366,6 @@ ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] @@ -427,7 +412,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -443,15 +427,14 @@ ; VBITS_GE_256-LABEL: select_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: mov z4.d, x9 -; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1 ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 ; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d @@ -467,7 +450,6 @@ ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: mov z2.d, x8 -; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] @@ -488,7 +470,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -509,7 +490,6 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] Index: llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp =================================================================== --- llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -227,8 +227,8 @@ KnownBits Known; APInt DemandedBits = APInt(8, 0xFF); TargetLowering::TargetLoweringOpt TLO(*DAG, false, false); - EXPECT_FALSE(TL.SimplifyDemandedBits(Op, DemandedBits, Known, TLO)); - EXPECT_EQ(Known.Zero, APInt(8, 0)); + EXPECT_TRUE(TL.SimplifyDemandedBits(Op, DemandedBits, Known, TLO)); + EXPECT_EQ(Known.Zero, APInt(8, 0xAA)); } // Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.