Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -1380,15 +1380,20 @@ /// every vector element. /// Targets can implement the computeKnownBitsForTargetNode method in the /// TargetLowering class to allow target nodes to be understood. - void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth = 0) const; + void + computeKnownBits(SDValue Op, KnownBits &Known, + SmallPtrSetImpl *AnyToZeroExtLoads = nullptr, + unsigned Depth = 0) const; /// Determine which bits of Op are known to be either zero or one and return /// them in Known. The DemandedElts argument allows us to only collect the /// known bits that are shared by the requested vector elements. /// Targets can implement the computeKnownBitsForTargetNode method in the /// TargetLowering class to allow target nodes to be understood. - void computeKnownBits(SDValue Op, KnownBits &Known, const APInt &DemandedElts, - unsigned Depth = 0) const; + void + computeKnownBits(SDValue Op, KnownBits &Known, const APInt &DemandedElts, + SmallPtrSetImpl *AnyToZeroExtLoads = nullptr, + unsigned Depth = 0) const; /// Used to represent the possible overflow behavior of an operation. /// Never: the operation cannot overflow. Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -2741,11 +2741,10 @@ /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts /// argument allows us to only collect the known bits that are shared by the /// requested vector elements. - virtual void computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const; + virtual void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const; /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. /// Default implementation computes low bits based on alignment Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2114,7 +2114,7 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { KnownBits Known; - computeKnownBits(Op, Known, Depth); + computeKnownBits(Op, Known, nullptr, Depth); return Mask.isSubsetOf(Known.Zero); } @@ -2157,21 +2157,25 @@ /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. -void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, - unsigned Depth) const { +void SelectionDAG::computeKnownBits( + SDValue Op, KnownBits &Known, + SmallPtrSetImpl *AnyToZeroExtLoads, unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); - computeKnownBits(Op, Known, DemandedElts, Depth); + computeKnownBits(Op, Known, DemandedElts, AnyToZeroExtLoads, Depth); } /// Determine which bits of Op are known to be either zero or one and return /// them in Known. The DemandedElts argument allows us to only collect the known -/// bits that are shared by the requested vector elements. -void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, - const APInt &DemandedElts, - unsigned Depth) const { +/// bits that are shared by the requested vector elements. AnyToZeroExtLoads +/// optionally allows anyext loads to be treated as zeroext (top bits zero) and +/// returned in the SmallPtrSet. They should be converted to zeroext loads for +/// the returned KnownBits to be valid. +void SelectionDAG::computeKnownBits( + SDValue Op, KnownBits &Known, const APInt &DemandedElts, + SmallPtrSetImpl *AnyToZeroExtLoads, unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); Known = KnownBits(BitWidth); // Don't know anything. @@ -2210,7 +2214,7 @@ continue; SDValue SrcOp = Op.getOperand(i); - computeKnownBits(SrcOp, Known2, Depth + 1); + computeKnownBits(SrcOp, Known2, AnyToZeroExtLoads, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != BitWidth) { @@ -2257,7 +2261,7 @@ // Known bits are the values that are shared by every demanded element. if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); - computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1); + computeKnownBits(LHS, Known2, DemandedLHS, AnyToZeroExtLoads, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2266,7 +2270,7 @@ break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); - computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1); + computeKnownBits(RHS, Known2, DemandedRHS, AnyToZeroExtLoads, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2283,7 +2287,8 @@ DemandedSub = DemandedSub.trunc(NumSubVectorElts); if (!!DemandedSub) { SDValue Sub = Op.getOperand(i); - computeKnownBits(Sub, Known2, DemandedSub, Depth + 1); + computeKnownBits(Sub, Known2, DemandedSub, AnyToZeroExtLoads, + Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2306,22 +2311,24 @@ uint64_t Idx = SubIdx->getZExtValue(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); if (!!DemandedSubElts) { - computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1); + computeKnownBits(Sub, Known, DemandedSubElts, AnyToZeroExtLoads, + Depth + 1); if (Known.isUnknown()) break; // early-out. } APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); APInt DemandedSrcElts = DemandedElts & ~SubMask; if (!!DemandedSrcElts) { - computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1); + computeKnownBits(Src, Known2, DemandedSrcElts, AnyToZeroExtLoads, + Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } } else { - computeKnownBits(Sub, Known, Depth + 1); + computeKnownBits(Sub, Known, AnyToZeroExtLoads, Depth + 1); if (Known.isUnknown()) break; // early-out. - computeKnownBits(Src, Known2, Depth + 1); + computeKnownBits(Src, Known2, AnyToZeroExtLoads, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } @@ -2337,9 +2344,9 @@ // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); - computeKnownBits(Src, Known, DemandedSrc, Depth + 1); + computeKnownBits(Src, Known, DemandedSrc, AnyToZeroExtLoads, Depth + 1); } else { - computeKnownBits(Src, Known, Depth + 1); + computeKnownBits(Src, Known, AnyToZeroExtLoads, Depth + 1); } break; } @@ -2354,7 +2361,7 @@ // Fast handling of 'identity' bitcasts. if (BitWidth == SubBitWidth) { - computeKnownBits(N0, Known, DemandedElts, Depth + 1); + computeKnownBits(N0, Known, DemandedElts, AnyToZeroExtLoads, Depth + 1); break; } @@ -2378,7 +2385,7 @@ SubDemandedElts.setBit(i * SubScale); for (unsigned i = 0; i != SubScale; ++i) { - computeKnownBits(N0, Known2, SubDemandedElts.shl(i), + computeKnownBits(N0, Known2, SubDemandedElts.shl(i), AnyToZeroExtLoads, Depth + 1); Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); @@ -2398,7 +2405,8 @@ if (DemandedElts[i]) SubDemandedElts.setBit(i / SubScale); - computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1); + computeKnownBits(N0, Known2, SubDemandedElts, AnyToZeroExtLoads, + Depth + 1); Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) @@ -2415,8 +2423,10 @@ } case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // Output known-1 bits are only known if set in both the LHS & RHS. Known.One &= Known2.One; @@ -2424,8 +2434,10 @@ Known.Zero |= Known2.Zero; break; case ISD::OR: - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // Output known-0 bits are only known if clear in both the LHS & RHS. Known.Zero &= Known2.Zero; @@ -2433,8 +2445,10 @@ Known.One |= Known2.One; break; case ISD::XOR: { - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); @@ -2444,8 +2458,10 @@ break; } case ISD::MUL: { - computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conservative estimate for high known-0 bits. @@ -2466,10 +2482,12 @@ // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); unsigned LeadZ = Known2.countMinLeadingZeros(); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); if (RHSMaxLeadingZeros != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); @@ -2479,22 +2497,26 @@ } case ISD::SELECT: case ISD::VSELECT: - computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1); + computeKnownBits(Op.getOperand(2), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If we don't know any bits, early out. if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1); + computeKnownBits(Op.getOperand(3), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If we don't know any bits, early out. if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1); + computeKnownBits(Op.getOperand(2), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; @@ -2523,7 +2545,8 @@ break; case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); unsigned Shift = ShAmt->getZExtValue(); Known.Zero <<= Shift; Known.One <<= Shift; @@ -2533,7 +2556,8 @@ break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); unsigned Shift = ShAmt->getZExtValue(); Known.Zero.lshrInPlace(Shift); Known.One.lshrInPlace(Shift); @@ -2562,7 +2586,8 @@ break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); unsigned Shift = ShAmt->getZExtValue(); // Sign extend known zero/one bit (else is unknown). Known.Zero.ashrInPlace(Shift); @@ -2586,7 +2611,8 @@ if (NewBits.getBoolValue()) InputDemandedBits |= InSignMask; - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); Known.One &= InputDemandedBits; Known.Zero &= InputDemandedBits; @@ -2606,7 +2632,8 @@ } case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleTZ = Known2.countMaxTrailingZeros(); unsigned LowBits = Log2_32(PossibleTZ) + 1; @@ -2615,7 +2642,8 @@ } case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleLZ = Known2.countMaxLeadingZeros(); unsigned LowBits = Log2_32(PossibleLZ) + 1; @@ -2623,7 +2651,8 @@ break; } case ISD::CTPOP: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If we know some of the bits are zero, they can't be one. unsigned PossibleOnes = Known2.countMaxPopulation(); Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); @@ -2636,6 +2665,14 @@ EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); Known.Zero.setBitsFrom(MemBits); + // If this is a EXTLoad and we are passed AnyToZeroExtLoads, treat + // the load as zero extended. + } else if (AnyToZeroExtLoads && ISD::isEXTLoad(Op.getNode()) && + Op.getResNo() == 0) { + EVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getScalarSizeInBits(); + Known.Zero.setBitsFrom(MemBits); + AnyToZeroExtLoads->insert(LD); } else if (const MDNode *Ranges = LD->getRanges()) { if (LD->getExtensionType() == ISD::NON_EXTLOAD) computeKnownBitsFromRangeMetadata(*Ranges, Known); @@ -2645,40 +2682,44 @@ case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); - computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, InDemandedElts, AnyToZeroExtLoads, + Depth + 1); Known = Known.zext(BitWidth); Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); Known = Known.zext(BitWidth); Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } // TODO ISD::SIGN_EXTEND_VECTOR_INREG case ISD::SIGN_EXTEND: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. Known = Known.sext(BitWidth); break; } case ISD::ANY_EXTEND: { - computeKnownBits(Op.getOperand(0), Known, Depth+1); + computeKnownBits(Op.getOperand(0), Known, AnyToZeroExtLoads, Depth + 1); Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); Known = Known.trunc(BitWidth); break; } case ISD::AssertZext: { EVT VT = cast(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - computeKnownBits(Op.getOperand(0), Known, Depth+1); + computeKnownBits(Op.getOperand(0), Known, AnyToZeroExtLoads, Depth + 1); Known.Zero |= (~InMask); Known.One &= (~Known.Zero); break; @@ -2709,7 +2750,7 @@ // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); computeKnownBits(Op.getOperand(1), Known2, DemandedElts, - Depth + 1); + AnyToZeroExtLoads, Depth + 1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -2725,12 +2766,14 @@ // If low bits are know to be zero in both operands, then we know they are // going to be 0 in the result. Both addition and complement operations // preserve the low zero bits. - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); unsigned KnownZeroLow = Known2.countMinTrailingZeros(); if (KnownZeroLow == 0) break; - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); Known.Zero.setLowBits(KnownZeroLow); break; @@ -2757,11 +2800,12 @@ // known to be clear. For example, if one input has the top 10 bits clear // and the other has the top 8 bits clear, we know the top 7 bits of the // output must be clear. - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); unsigned KnownZeroLow = Known2.countMinTrailingZeros(); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, Depth + 1); KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); @@ -2786,7 +2830,8 @@ const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, + AnyToZeroExtLoads, Depth + 1); // The low bits of the first operand are unchanged by the srem. Known.Zero = Known2.Zero & LowBits; @@ -2810,7 +2855,8 @@ const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, + AnyToZeroExtLoads, Depth + 1); // The upper bits are all zero, the lower ones are unchanged. Known.Zero = Known2.Zero | ~LowBits; @@ -2821,8 +2867,10 @@ // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); uint32_t Leaders = std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); @@ -2831,7 +2879,7 @@ break; } case ISD::EXTRACT_ELEMENT: { - computeKnownBits(Op.getOperand(0), Known, Depth+1); + computeKnownBits(Op.getOperand(0), Known, AnyToZeroExtLoads, Depth + 1); const unsigned Index = Op.getConstantOperandVal(1); const unsigned BitWidth = Op.getValueSizeInBits(); @@ -2859,10 +2907,10 @@ // If we know the element index, just demand that vector element. unsigned Idx = ConstEltNo->getZExtValue(); APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - computeKnownBits(InVec, Known, DemandedElt, Depth + 1); + computeKnownBits(InVec, Known, DemandedElt, AnyToZeroExtLoads, Depth + 1); } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, Known, Depth + 1); + computeKnownBits(InVec, Known, AnyToZeroExtLoads, Depth + 1); } if (BitWidth > EltBitWidth) Known = Known.zext(BitWidth); @@ -2882,7 +2930,7 @@ // If we demand the inserted element then add its common known bits. if (DemandedElts[EltIdx]) { - computeKnownBits(InVal, Known2, Depth + 1); + computeKnownBits(InVal, Known2, AnyToZeroExtLoads, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } @@ -2891,33 +2939,37 @@ // that we don't demand the inserted element. APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); if (!!VectorElts) { - computeKnownBits(InVec, Known2, VectorElts, Depth + 1); + computeKnownBits(InVec, Known2, VectorElts, AnyToZeroExtLoads, + Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, Known, Depth + 1); - computeKnownBits(InVal, Known2, Depth + 1); + computeKnownBits(InVec, Known, AnyToZeroExtLoads, Depth + 1); + computeKnownBits(InVal, Known2, AnyToZeroExtLoads, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } break; } case ISD::BITREVERSE: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); Known.Zero = Known2.Zero.reverseBits(); Known.One = Known2.One.reverseBits(); break; } case ISD::BSWAP: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); Known.Zero = Known2.Zero.byteSwap(); Known.One = Known2.One.byteSwap(); break; } case ISD::ABS: { - computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // If the source's MSB is zero then we know the rest of the bits already. if (Known2.isNonNegative()) { @@ -2936,8 +2988,10 @@ break; } case ISD::UMIN: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); // UMIN - we know that the result will have the maximum of the // known zero leading bits of the inputs. @@ -2950,9 +3004,10 @@ break; } case ISD::UMAX: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, Depth + 1); - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // UMAX - we know that the result will have the maximum of the // known one leading bits of the inputs. @@ -2996,9 +3051,11 @@ } // Fallback - just get the shared known bits of the operands. - computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, AnyToZeroExtLoads, + Depth + 1); if (Known.isUnknown()) break; // Early-out - computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, AnyToZeroExtLoads, + Depth + 1); Known.Zero &= Known2.Zero; Known.One &= Known2.One; break; @@ -3016,7 +3073,8 @@ case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); + TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, + AnyToZeroExtLoads, Depth); break; } @@ -3360,7 +3418,7 @@ if (ConstantSDNode *CRHS = dyn_cast(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { KnownBits Known; - computeKnownBits(Op.getOperand(0), Known, Depth+1); + computeKnownBits(Op.getOperand(0), Known, nullptr, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -3385,7 +3443,7 @@ if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) if (CLHS->isNullValue()) { KnownBits Known; - computeKnownBits(Op.getOperand(1), Known, Depth+1); + computeKnownBits(Op.getOperand(1), Known, nullptr, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((Known.Zero | 1).isAllOnesValue()) @@ -3551,7 +3609,7 @@ // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. KnownBits Known; - computeKnownBits(Op, Known, DemandedElts, Depth); + computeKnownBits(Op, Known, DemandedElts, nullptr, Depth); APInt Mask; if (Known.isNonNegative()) { // sign bit is 0 Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -529,7 +529,7 @@ if (Depth != 0) { // If not at the root, Just compute the Known bits to // simplify things downstream. - TLO.DAG.computeKnownBits(Op, Known, Depth); + TLO.DAG.computeKnownBits(Op, Known, nullptr, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -580,7 +580,7 @@ SDValue Op0 = Op.getOperand(0); KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); + TLO.DAG.computeKnownBits(Op0, LHSKnown, nullptr, Depth); // If the LHS already has zeros where RHSC does, this 'and' is dead. if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); @@ -1204,7 +1204,7 @@ // If this is a bitcast, let computeKnownBits handle it. Only do this on a // recursive call where Known may be useful to the caller. if (Depth > 0) { - TLO.DAG.computeKnownBits(Op, Known, Depth); + TLO.DAG.computeKnownBits(Op, Known, nullptr, Depth); return false; } break; @@ -1257,7 +1257,7 @@ } default: // Just use computeKnownBits to compute output bits. - TLO.DAG.computeKnownBits(Op, Known, Depth); + TLO.DAG.computeKnownBits(Op, Known, nullptr, Depth); break; } @@ -1569,11 +1569,10 @@ /// Determine which of the bits specified in Mask are known to be either zero or /// one and return them in the Known. -void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { +void TargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || @@ -2638,6 +2637,58 @@ return N0; } + // Test if both operands are AND's, with the same mask, and the + // non-mask bits are the same (often all zero). In this case we + // can drop the ands. + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + N0.getNode()->hasOneUse() && N1.getNode()->hasOneUse()) { + SDValue N0LHS = N0.getOperand(0); + SDValue N0RHS = N0.getOperand(1); + SDValue N1LHS = N1.getOperand(0); + SDValue N1RHS = N1.getOperand(1); + + if (isa(N0LHS)) + std::swap(N0LHS, N0RHS); + if (isa(N1LHS)) + std::swap(N1LHS, N1RHS); + + if (!isa(N0RHS) || !isa(N1RHS)) + return SDValue(); + + APInt AndMask = cast(N0RHS)->getAPIntValue(); + if (cast(N1RHS)->getAPIntValue() != AndMask) + return SDValue(); + + KnownBits KB0, KB1; + llvm::SmallPtrSet AnyToZeroExtLoads; + DAG.computeKnownBits(N0LHS, KB0, &AnyToZeroExtLoads); + DAG.computeKnownBits(N1LHS, KB1, &AnyToZeroExtLoads); + + // Check we know something about all bits + if (!(KB0.Zero | KB0.One | AndMask).isAllOnesValue()) + return SDValue(); + + // All non-mask bits must be the same from N0 and N1. + if ((KB0.Zero & ~AndMask) != (KB1.Zero & ~AndMask) || + (KB0.One & ~AndMask) != (KB1.One & ~AndMask)) + return SDValue(); + + // Transform anyext loads -> zeroext loads + for (auto *LD : AnyToZeroExtLoads) { + SDValue Load = DAG.getExtLoad( + ISD::ZEXTLOAD, SDLoc(LD), LD->getValueType(0), LD->getChain(), + LD->getBasePtr(), LD->getPointerInfo(), LD->getMemoryVT(), + LD->getAlignment(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), Load); + if (LD == N0LHS.getNode()) + N0LHS = Load; + if (LD == N1LHS.getNode()) + N1LHS = Load; + } + + return DAG.getSetCC(dl, VT, N0LHS, N1LHS, Cond); + } + // Could not fold it. return SDValue(); } Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -250,10 +250,10 @@ /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. - void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -965,15 +965,18 @@ /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them Known. void AArch64TargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, KnownBits &Known, - const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { switch (Op.getOpcode()) { default: break; case AArch64ISD::CSEL: { KnownBits Known2; - DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1); - DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1); + DAG.computeKnownBits(Op->getOperand(0), Known, AnyToZeroExtLoads, + Depth + 1); + DAG.computeKnownBits(Op->getOperand(1), Known2, AnyToZeroExtLoads, + Depth + 1); Known.Zero &= Known2.Zero; Known.One &= Known2.One; break; Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -227,11 +227,10 @@ /// \brief Determine which of the bits specified in \p Mask are known to be /// either zero or one and return them in the \p KnownZero and \p KnownOne /// bitsets. - void computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4128,8 +4128,9 @@ } void AMDGPUTargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, KnownBits &Known, - const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { Known.resetAll(); // Don't know anything. @@ -4168,8 +4169,10 @@ case AMDGPUISD::MUL_U24: case AMDGPUISD::MUL_I24: { KnownBits LHSKnown, RHSKnown; - DAG.computeKnownBits(Op.getOperand(0), LHSKnown, Depth + 1); - DAG.computeKnownBits(Op.getOperand(1), RHSKnown, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), LHSKnown, AnyToZeroExtLoads, + Depth + 1); + DAG.computeKnownBits(Op.getOperand(1), RHSKnown, AnyToZeroExtLoads, + Depth + 1); unsigned TrailZ = LHSKnown.countMinTrailingZeros() + RHSKnown.countMinTrailingZeros(); Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -383,11 +383,11 @@ SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const override; - + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, + SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const override; bool ExpandInlineAsm(CallInst *CI) const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -13381,11 +13381,10 @@ return true; } -void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { unsigned BitWidth = Known.getBitWidth(); Known.resetAll(); switch (Op.getOpcode()) { @@ -13408,12 +13407,13 @@ break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), Known, AnyToZeroExtLoads, Depth + 1); if (Known.isUnknown()) return; KnownBits KnownRHS; - DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownRHS, AnyToZeroExtLoads, + Depth + 1); Known.Zero &= KnownRHS.Zero; Known.One &= KnownRHS.One; return; @@ -13435,7 +13435,7 @@ case ARMISD::BFI: { // Conservatively, we can recurse down the first operand // and just mask out all affected bits. - DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), Known, AnyToZeroExtLoads, Depth + 1); // The operand to BFI is already a mask suitable for removing the bits it // sets. Index: lib/Target/Lanai/LanaiISelLowering.h =================================================================== --- lib/Target/Lanai/LanaiISelLowering.h +++ lib/Target/Lanai/LanaiISelLowering.h @@ -106,10 +106,10 @@ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; private: SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, Index: lib/Target/Lanai/LanaiISelLowering.cpp =================================================================== --- lib/Target/Lanai/LanaiISelLowering.cpp +++ lib/Target/Lanai/LanaiISelLowering.cpp @@ -1503,7 +1503,8 @@ void LanaiTargetLowering::computeKnownBitsForTargetNode( const SDValue Op, KnownBits &Known, const APInt &DemandedElts, - const SelectionDAG &DAG, unsigned Depth) const { + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { unsigned BitWidth = Known.getBitWidth(); switch (Op.getOpcode()) { default: @@ -1514,8 +1515,10 @@ break; case LanaiISD::SELECT_CC: KnownBits Known2; - DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1); - DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1); + DAG.computeKnownBits(Op->getOperand(0), Known, AnyToZeroExtLoads, + Depth + 1); + DAG.computeKnownBits(Op->getOperand(1), Known2, AnyToZeroExtLoads, + Depth + 1); Known.Zero &= Known2.Zero; Known.One &= Known2.One; break; Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -659,11 +659,11 @@ unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; - void computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, + SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; unsigned getPrefLoopAlignment(MachineLoop *ML) const override; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -12934,11 +12934,10 @@ // Inline Assembly Support //===----------------------------------------------------------------------===// -void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { +void PPCTargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { Known.resetAll(); switch (Op.getOpcode()) { default: break; Index: lib/Target/Sparc/SparcISelLowering.h =================================================================== --- lib/Target/Sparc/SparcISelLowering.h +++ lib/Target/Sparc/SparcISelLowering.h @@ -65,11 +65,11 @@ /// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. - void computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, + SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -1883,12 +1883,10 @@ /// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to /// be zero. Op is expected to be a target specific node. Used by DAG /// combiner. -void SparcTargetLowering::computeKnownBitsForTargetNode - (const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { +void SparcTargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { KnownBits Known2; Known.resetAll(); @@ -1897,8 +1895,9 @@ case SPISD::SELECT_ICC: case SPISD::SELECT_XCC: case SPISD::SELECT_FCC: - DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1); - DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), Known, AnyToZeroExtLoads, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), Known2, AnyToZeroExtLoads, + Depth + 1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -492,11 +492,10 @@ /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. - void computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; ISD::NodeType getExtendForAtomicOps() const override { return ISD::ANY_EXTEND; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5576,20 +5576,20 @@ return SDValue(); } -void -SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { +void SystemZTargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { unsigned BitWidth = Known.getBitWidth(); Known.resetAll(); switch (Op.getOpcode()) { case SystemZISD::SELECT_CCMASK: { KnownBits TrueKnown(BitWidth), FalseKnown(BitWidth); - DAG.computeKnownBits(Op.getOperand(0), TrueKnown, Depth + 1); - DAG.computeKnownBits(Op.getOperand(1), FalseKnown, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), TrueKnown, AnyToZeroExtLoads, + Depth + 1); + DAG.computeKnownBits(Op.getOperand(1), FalseKnown, AnyToZeroExtLoads, + Depth + 1); Known.Zero = TrueKnown.Zero & FalseKnown.Zero; Known.One = TrueKnown.One & FalseKnown.One; break; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -840,11 +840,11 @@ /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. - void computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, + SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; /// Determine the number of bits in the operation that are sign bits. unsigned ComputeNumSignBitsForTargetNode(SDValue Op, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -28137,11 +28137,10 @@ return TLO.CombineTo(Op, NewOp); } -void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { +void X86TargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { unsigned BitWidth = Known.getBitWidth(); unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); @@ -28169,7 +28168,7 @@ EVT SrcVT = Src.getValueType(); APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(), Op.getConstantOperandVal(1)); - DAG.computeKnownBits(Src, Known, DemandedElt, Depth + 1); + DAG.computeKnownBits(Src, Known, DemandedElt, AnyToZeroExtLoads, Depth + 1); Known = Known.zextOrTrunc(BitWidth); Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits()); break; @@ -28182,7 +28181,8 @@ break; } - DAG.computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), Known, DemandedElts, + AnyToZeroExtLoads, Depth + 1); unsigned ShAmt = ShiftImm->getZExtValue(); if (Opc == X86ISD::VSHLI) { Known.Zero <<= ShAmt; @@ -28210,18 +28210,20 @@ Known = KnownBits(InBitWidth); APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts); - DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1); + DAG.computeKnownBits(N0, Known, DemandedSrcElts, AnyToZeroExtLoads, + Depth + 1); Known = Known.zext(BitWidth); Known.Zero.setBitsFrom(InBitWidth); break; } case X86ISD::CMOV: { - DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), Known, AnyToZeroExtLoads, Depth + 1); // If we don't know any bits, early out. if (Known.isUnknown()) break; KnownBits Known2; - DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), Known2, AnyToZeroExtLoads, + Depth + 1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1324,9 +1324,9 @@ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); KnownBits Known0; - CurDAG->computeKnownBits(N->getOperand(0), Known0, 0); + CurDAG->computeKnownBits(N->getOperand(0), Known0, nullptr, 0); KnownBits Known1; - CurDAG->computeKnownBits(N->getOperand(1), Known1, 0); + CurDAG->computeKnownBits(N->getOperand(1), Known1, nullptr, 0); return (~Known0.Zero & ~Known1.Zero) == 0; }]>; Index: lib/Target/XCore/XCoreISelLowering.h =================================================================== --- lib/Target/XCore/XCoreISelLowering.h +++ lib/Target/XCore/XCoreISelLowering.h @@ -200,11 +200,11 @@ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - void computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, + SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth = 0) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -1819,11 +1819,10 @@ return SDValue(); } -void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { +void XCoreTargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, SmallPtrSetImpl *AnyToZeroExtLoads, + unsigned Depth) const { Known.resetAll(); switch (Op.getOpcode()) { default: break; Index: test/CodeGen/Thumb/setcc_xor.ll =================================================================== --- test/CodeGen/Thumb/setcc_xor.ll +++ test/CodeGen/Thumb/setcc_xor.ll @@ -4,27 +4,20 @@ define i8 @test1(i8 zeroext %x, i8 zeroext %y) { ; CHECK-V6M-LABEL: test1: -; CHECK-V6M: movs r2, #255 -; CHECK-V6M-NEXT: mov r3, r2 -; CHECK-V6M-NEXT: bics r3, r1 -; CHECK-V6M-NEXT: bics r2, r0 -; CHECK-V6M-NEXT: mvns r0, r0 +; CHECK-V6M: mvns r0, r0 ; CHECK-V6M-NEXT: mvns r1, r1 -; CHECK-V6M-NEXT: cmp r2, r3 +; CHECK-V6M-NEXT: cmp r0, r1 ; CHECK-V6M-NEXT: bls .LBB0_2 ; CHECK-V6M-NEXT: mov r0, r1 ; CHECK-V6M-NEXT: .LBB0_2: ; CHECK-V6M-NEXT: bx lr ; ; CHECK-V7M-LABEL: test1: -; CHECK-V7M: mvns r1, r1 -; CHECK-V7M-NEXT: mvns r0, r0 -; CHECK-V7M-NEXT: uxtb r2, r1 -; CHECK-V7M-NEXT: uxtb r3, r0 -; CHECK-V7M-NEXT: cmp r3, r2 +; CHECK-V7M: mvns r2, r0 +; CHECK-V7M-NEXT: mvns r0, r1 +; CHECK-V7M-NEXT: cmp r2, r0 ; CHECK-V7M-NEXT: it ls -; CHECK-V7M-NEXT: movls r1, r0 -; CHECK-V7M-NEXT: mov r0, r1 +; CHECK-V7M-NEXT: movls r0, r2 ; CHECK-V7M-NEXT: bx lr entry: %nx = xor i8 %x, 255 @@ -36,43 +29,37 @@ define void @test2(i8* %X, i8* %Y) { ; CHECK-V6M-LABEL: test2: -; CHECK-V6M: .save {r4, r5, r7, lr} -; CHECK-V6M-NEXT: push {r4, r5, r7, lr} -; CHECK-V6M-NEXT: ldrb r2, [r1] -; CHECK-V6M-NEXT: movs r4, #255 -; CHECK-V6M-NEXT: mov r5, r4 -; CHECK-V6M-NEXT: bics r5, r2 -; CHECK-V6M-NEXT: ldrb r3, [r0] -; CHECK-V6M-NEXT: bics r4, r3 -; CHECK-V6M-NEXT: mvns r3, r3 +; CHECK-V6M: .save {r4, lr} +; CHECK-V6M-NEXT: push {r4, lr} +; CHECK-V6M-NEXT: ldrb r2, [r0] ; CHECK-V6M-NEXT: mvns r2, r2 -; CHECK-V6M-NEXT: cmp r4, r5 -; CHECK-V6M-NEXT: mov r4, r3 -; CHECK-V6M-NEXT: blo .LBB1_2 +; CHECK-V6M-NEXT: ldrb r3, [r1] +; CHECK-V6M-NEXT: mvns r3, r3 +; CHECK-V6M-NEXT: cmp r2, r3 ; CHECK-V6M-NEXT: mov r4, r2 +; CHECK-V6M-NEXT: blo .LBB1_2 +; CHECK-V6M-NEXT: mov r4, r3 ; CHECK-V6M-NEXT: .LBB1_2: -; CHECK-V6M-NEXT: subs r3, r3, r4 -; CHECK-V6M-NEXT: strb r3, [r0] -; CHECK-V6M-NEXT: subs r0, r2, r4 +; CHECK-V6M-NEXT: subs r2, r2, r4 +; CHECK-V6M-NEXT: strb r2, [r0] +; CHECK-V6M-NEXT: subs r0, r3, r4 ; CHECK-V6M-NEXT: strb r0, [r1] -; CHECK-V6M-NEXT: pop {r4, r5, r7, pc} +; CHECK-V6M-NEXT: pop {r4, pc} ; ; CHECK-V7M-LABEL: test2: ; CHECK-V7M: .save {r7, lr} ; CHECK-V7M-NEXT: push {r7, lr} -; CHECK-V7M-NEXT: ldrb r3, [r0] -; CHECK-V7M-NEXT: ldrb r2, [r1] -; CHECK-V7M-NEXT: mvns r3, r3 -; CHECK-V7M-NEXT: mvn.w lr, r2 -; CHECK-V7M-NEXT: uxtb r2, r3 -; CHECK-V7M-NEXT: uxtb.w r12, lr -; CHECK-V7M-NEXT: cmp r2, r12 -; CHECK-V7M-NEXT: mov r2, lr +; CHECK-V7M-NEXT: ldrb r3, [r1] +; CHECK-V7M-NEXT: ldrb r2, [r0] +; CHECK-V7M-NEXT: mvn.w lr, r3 +; CHECK-V7M-NEXT: mvn.w r12, r2 +; CHECK-V7M-NEXT: cmp r12, lr +; CHECK-V7M-NEXT: mov r3, lr ; CHECK-V7M-NEXT: it lo -; CHECK-V7M-NEXT: movlo r2, r3 -; CHECK-V7M-NEXT: subs r3, r3, r2 -; CHECK-V7M-NEXT: strb r3, [r0] -; CHECK-V7M-NEXT: sub.w r0, lr, r2 +; CHECK-V7M-NEXT: movlo r3, r12 +; CHECK-V7M-NEXT: sub.w r2, r12, r3 +; CHECK-V7M-NEXT: strb r2, [r0] +; CHECK-V7M-NEXT: sub.w r0, lr, r3 ; CHECK-V7M-NEXT: strb r0, [r1] ; CHECK-V7M-NEXT: pop {r7, pc} entry: @@ -92,42 +79,31 @@ define void @testloop(i32 %I, i8* nocapture readonly %A, i8* nocapture %B) { ; CHECK-V6M-LABEL: testloop: -; CHECK-V6M: .save {r4, r5, r6, r7, lr} -; CHECK-V6M-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-V6M-NEXT: .pad #4 -; CHECK-V6M-NEXT: sub sp, #4 +; CHECK-V6M: .save {r4, r5, r6, lr} +; CHECK-V6M-NEXT: push {r4, r5, r6, lr} ; CHECK-V6M-NEXT: cmp r0, #1 ; CHECK-V6M-NEXT: blt .LBB2_6 ; CHECK-V6M-NEXT: .LBB2_1: +; CHECK-V6M-NEXT: ldrb r3, [r1] +; CHECK-V6M-NEXT: mvns r4, r3 ; CHECK-V6M-NEXT: ldrb r3, [r1, #2] -; CHECK-V6M-NEXT: movs r6, #255 -; CHECK-V6M-NEXT: mov r5, r6 -; CHECK-V6M-NEXT: bics r5, r3 -; CHECK-V6M-NEXT: ldrb r4, [r1] -; CHECK-V6M-NEXT: mov r7, r6 -; CHECK-V6M-NEXT: bics r7, r4 -; CHECK-V6M-NEXT: mvns r4, r4 ; CHECK-V6M-NEXT: mvns r3, r3 -; CHECK-V6M-NEXT: cmp r7, r5 +; CHECK-V6M-NEXT: cmp r4, r3 ; CHECK-V6M-NEXT: mov r5, r4 ; CHECK-V6M-NEXT: blo .LBB2_3 ; CHECK-V6M-NEXT: mov r5, r3 ; CHECK-V6M-NEXT: .LBB2_3: -; CHECK-V6M-NEXT: str r3, [sp] -; CHECK-V6M-NEXT: uxtb r3, r5 -; CHECK-V6M-NEXT: ldrb r7, [r1, #1] -; CHECK-V6M-NEXT: bics r6, r7 -; CHECK-V6M-NEXT: mvns r7, r7 -; CHECK-V6M-NEXT: cmp r3, r6 +; CHECK-V6M-NEXT: ldrb r6, [r1, #1] +; CHECK-V6M-NEXT: mvns r6, r6 +; CHECK-V6M-NEXT: cmp r5, r6 ; CHECK-V6M-NEXT: blo .LBB2_5 -; CHECK-V6M-NEXT: mov r5, r7 +; CHECK-V6M-NEXT: mov r5, r6 ; CHECK-V6M-NEXT: .LBB2_5: ; CHECK-V6M-NEXT: strb r5, [r2] -; CHECK-V6M-NEXT: subs r3, r4, r5 -; CHECK-V6M-NEXT: strb r3, [r2, #1] -; CHECK-V6M-NEXT: subs r3, r7, r5 -; CHECK-V6M-NEXT: strb r3, [r2, #2] -; CHECK-V6M-NEXT: ldr r3, [sp] +; CHECK-V6M-NEXT: subs r4, r4, r5 +; CHECK-V6M-NEXT: strb r4, [r2, #1] +; CHECK-V6M-NEXT: subs r4, r6, r5 +; CHECK-V6M-NEXT: strb r4, [r2, #2] ; CHECK-V6M-NEXT: subs r3, r3, r5 ; CHECK-V6M-NEXT: strb r3, [r2, #3] ; CHECK-V6M-NEXT: adds r2, r2, #4 @@ -135,45 +111,40 @@ ; CHECK-V6M-NEXT: subs r0, r0, #1 ; CHECK-V6M-NEXT: bne .LBB2_1 ; CHECK-V6M-NEXT: .LBB2_6: -; CHECK-V6M-NEXT: add sp, #4 -; CHECK-V6M-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-V6M-NEXT: pop {r4, r5, r6, pc} ; ; CHECK-V7M-LABEL: testloop: -; CHECK-V7M: .save {r4, r5, r6, r7, lr} -; CHECK-V7M-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-V7M: .save {r4, r5, r7, lr} +; CHECK-V7M-NEXT: push {r4, r5, r7, lr} ; CHECK-V7M-NEXT: cmp r0, #1 ; CHECK-V7M-NEXT: blt .LBB2_2 ; CHECK-V7M-NEXT: .LBB2_1: ; CHECK-V7M-NEXT: ldrb.w lr, [r1] -; CHECK-V7M-NEXT: ldrb r3, [r1, #2] +; CHECK-V7M-NEXT: ldrb r4, [r1, #2] ; CHECK-V7M-NEXT: ldrb.w r12, [r1, #1] ; CHECK-V7M-NEXT: adds r1, #3 -; CHECK-V7M-NEXT: mvn.w r4, lr -; CHECK-V7M-NEXT: mvns r7, r3 -; CHECK-V7M-NEXT: uxtb r5, r4 -; CHECK-V7M-NEXT: uxtb r6, r7 -; CHECK-V7M-NEXT: cmp r5, r6 -; CHECK-V7M-NEXT: mov r3, r7 -; CHECK-V7M-NEXT: mvn.w r5, r12 +; CHECK-V7M-NEXT: mvn.w r5, lr +; CHECK-V7M-NEXT: mvn.w lr, r4 +; CHECK-V7M-NEXT: cmp r5, lr +; CHECK-V7M-NEXT: mov r4, lr +; CHECK-V7M-NEXT: mvn.w r3, r12 ; CHECK-V7M-NEXT: it lo -; CHECK-V7M-NEXT: movlo r3, r4 -; CHECK-V7M-NEXT: uxtb r6, r5 -; CHECK-V7M-NEXT: uxtb.w lr, r3 -; CHECK-V7M-NEXT: cmp lr, r6 +; CHECK-V7M-NEXT: movlo r4, r5 +; CHECK-V7M-NEXT: cmp r4, r3 ; CHECK-V7M-NEXT: it hs -; CHECK-V7M-NEXT: movhs r3, r5 +; CHECK-V7M-NEXT: movhs r4, r3 ; CHECK-V7M-NEXT: subs r0, #1 -; CHECK-V7M-NEXT: sub.w r6, r4, r3 -; CHECK-V7M-NEXT: strb r3, [r2] -; CHECK-V7M-NEXT: strb r6, [r2, #1] -; CHECK-V7M-NEXT: sub.w r6, r5, r3 -; CHECK-V7M-NEXT: strb r6, [r2, #2] -; CHECK-V7M-NEXT: sub.w r3, r7, r3 +; CHECK-V7M-NEXT: sub.w r3, r3, r4 +; CHECK-V7M-NEXT: strb r4, [r2] +; CHECK-V7M-NEXT: sub.w r5, r5, r4 +; CHECK-V7M-NEXT: strb r5, [r2, #1] +; CHECK-V7M-NEXT: strb r3, [r2, #2] +; CHECK-V7M-NEXT: sub.w r3, lr, r4 ; CHECK-V7M-NEXT: strb r3, [r2, #3] ; CHECK-V7M-NEXT: add.w r2, r2, #4 ; CHECK-V7M-NEXT: bne .LBB2_1 ; CHECK-V7M-NEXT: .LBB2_2: -; CHECK-V7M-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-V7M-NEXT: pop {r4, r5, r7, pc} entry: %cmp74 = icmp sgt i32 %I, 0 br i1 %cmp74, label %for.body.preheader, label %for.cond.cleanup