Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2688,6 +2688,10 @@ // c) x & (-1 >> (32 - y)) // d) x << (32 - y) >> (32 - y) bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { + assert( + (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && + "Should be either an and-mask, or right-shift after clearing high bits."); + // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) return false; @@ -2698,13 +2702,16 @@ if (NVT != MVT::i32 && NVT != MVT::i64) return false; + unsigned Size = NVT.getSizeInBits(); + SDValue NBits; // If we have BMI2's BZHI, we are ok with muti-use patterns. // Else, if we only have BMI1's BEXTR, we require one-use. const bool CanHaveExtraUses = Subtarget->hasBMI2(); - auto checkOneUse = [CanHaveExtraUses](SDValue Op) { - return CanHaveExtraUses || Op.hasOneUse(); + auto checkOneUse = [CanHaveExtraUses](SDValue Op, unsigned NUses = 1) { + return CanHaveExtraUses || + Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); }; // a) x & ((1 << nbits) + (-1)) @@ -2740,33 +2747,76 @@ return true; }; + SDValue X; + + // d) x << (32 - y) >> (32 - y) + auto matchPatternD = [&checkOneUse, Size, &X, &NBits](SDNode *Node) -> bool { + if (Node->getOpcode() != ISD::SRL) + return false; + SDValue N0 = Node->getOperand(0); + if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0)) + return false; + SDValue N1 = Node->getOperand(1); + SDValue N01 = N0->getOperand(1); + // Both of the shifts must be by the exact same value. + // There should not be any uses of the shift amount outside of the pattern. + if (N1 != N01 || !checkOneUse(N1, 2)) + return false; + // Skip over a truncate of the shift amount. + if (N1->getOpcode() == ISD::TRUNCATE) { + N1 = N1->getOperand(0); + // The trunc should have been the only user of the real shift amount. + if (!checkOneUse(N1)) + return false; + } + // Match the shift amount as: (bitwidth - y). It should go away, too. + if (N1.getOpcode() != ISD::SUB) + return false; + auto N10 = dyn_cast(N1.getOperand(0)); + if (!N10 || N10->getZExtValue() != Size) + return false; + X = N0->getOperand(0); + NBits = N1.getOperand(1); + return true; + }; + auto matchLowBitMask = [&matchPatternA, &matchPatternB](SDValue Mask) -> bool { - // FIXME: patterns c, d. + // FIXME: pattern c. return matchPatternA(Mask) || matchPatternB(Mask); }; - SDValue X = Node->getOperand(0); - SDValue Mask = Node->getOperand(1); + if (Node->getOpcode() == ISD::AND) { + X = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); - if (matchLowBitMask(Mask)) { - // Great. - } else { - std::swap(X, Mask); - if (!matchLowBitMask(Mask)) - return false; - } + if (matchLowBitMask(Mask)) { + // Great. + } else { + std::swap(X, Mask); + if (!matchLowBitMask(Mask)) + return false; + } + } else if (!matchPatternD(Node)) + return false; SDLoc DL(Node); - // Insert 8-bit NBits into lowest 8 bits of NVT-sized (32 or 64-bit) register. - // All the other bits are undefined, we do not care about them. - SDValue ImplDef = - SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, NVT), 0); - insertDAGNode(*CurDAG, NBits, ImplDef); SDValue OrigNBits = NBits; - NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, NVT, ImplDef, NBits); - insertDAGNode(*CurDAG, OrigNBits, NBits); + if (NBits.getValueType() != NVT) { + // Truncate the shift amount. + NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits); + insertDAGNode(*CurDAG, OrigNBits, NBits); + + // Insert 8-bit NBits into lowest 8 bits of NVT-sized (32 or 64-bit) + // register. All the other bits are undefined, we do not care about them. + SDValue ImplDef = + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, NVT), 0); + insertDAGNode(*CurDAG, OrigNBits, ImplDef); + NBits = + CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, NVT, ImplDef, NBits); + insertDAGNode(*CurDAG, OrigNBits, NBits); + } if (Subtarget->hasBMI2()) { // Great, just emit the the BZHI.. @@ -2963,17 +3013,8 @@ if (ShiftAmt->getOpcode() == ISD::TRUNCATE) ShiftAmt = ShiftAmt->getOperand(0); - // Special case to avoid messing up a BZHI pattern. - // Look for (srl (shl X, (size - y)), (size - y) - if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) && - N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL && - // Shift amounts the same? - N->getOperand(1) == N->getOperand(0).getOperand(1) && - // Shift amounts size - y? - ShiftAmt.getOpcode() == ISD::SUB && - isa(ShiftAmt.getOperand(0)) && - cast(ShiftAmt.getOperand(0))->getZExtValue() == Size) - return false; + // This function is called after X86DAGToDAGISel::matchBitExtract(), + // so we are not afraid that we might mess up BZHI/BEXTR pattern. SDValue NewShiftAmt; if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { @@ -3172,6 +3213,9 @@ } case ISD::SRL: + if (matchBitExtract(Node)) + return; + LLVM_FALLTHROUGH; case ISD::SRA: case ISD::SHL: if (tryShiftAmountMod(Node)) Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -2519,14 +2519,6 @@ (and (x86memop addr:$src), (srl -1, (sub bitwidth, GR8:$lz))), RC, VT, DstInst, DstMemInst>; - - // x << (bitwidth - y) >> (bitwidth - y) - defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)), - (sub bitwidth, GR8:$lz)), - (srl (shl (x86memop addr:$src), - (sub bitwidth, GR8:$lz)), - (sub bitwidth, GR8:$lz)), - RC, VT, DstInst, DstMemInst>; } defm : bmi_bzhi_patterns; @@ -2545,24 +2537,6 @@ def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))), (BZHI64rm addr:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; - - // x << (32 - y) >> (32 - y) - def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))), - (i8 (trunc (sub 32, GR32:$lz)))), - (BZHI32rr GR32:$src, GR32:$lz)>; - def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))), - (i8 (trunc (sub 32, GR32:$lz)))), - (BZHI32rm addr:$src, GR32:$lz)>; - - // x << (64 - y) >> (64 - y) - def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))), - (i8 (trunc (sub 64, GR32:$lz)))), - (BZHI64rr GR64:$src, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; - def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))), - (i8 (trunc (sub 64, GR32:$lz)))), - (BZHI64rm addr:$src, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; } // HasBMI2 multiclass bmi_pdep_pext