Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -8790,6 +8790,181 @@ return SDValue(); } +// Recursively walk a potential ladder of test-set-and-select operations that +// may contribute to a bit-reversal operation. +// +// if (bit I in X set) +// set bit J in Y +// +static bool CollectRBITParts(SDNode *N, SmallVectorImpl &Parts, + SDNode *&BaseVal, unsigned Depth=32) { + if (Depth == 0) + // Don't recurse too deep. + return false; + + if (N->getOpcode() == ISD::OR) { + // ORs glue the rungs of the ladder together. Just recurse through them. + return CollectRBITParts(N->getOperand(0).getNode(), Parts, BaseVal, + Depth - 1) && + CollectRBITParts(N->getOperand(1).getNode(), Parts, BaseVal, + Depth - 1); + } else if (N->getOpcode() == ISD::AND) { + + // Match an AND of an SHL and a power of two (or SHR): + // + // Y = (X << C1) & (1 << C2) + // This is an idiomatic test-and-set; test bit C2-C1 in X and if it is set, + // set (only) bit C2 in Y. + // + // Y = (X >> C1) & (1 << C2) + // This is a test-and-set of C1+C2 in X and if it is set, set bit C2 in Y. + + auto Shift = N->getOperand(0); + auto *AndN = dyn_cast(N->getOperand(1)); + if (!AndN) + return false; + + bool Left; + switch (Shift->getOpcode()) { + case ISD::SHL: + Left = true; break; + case ISD::SRL: + case ISD::SRA: + Left = false; break; + default: + return false; + } + + APInt AndConstant = AndN->getAPIntValue(); + if (!AndConstant.isPowerOf2()) + return false; + int AndBit = AndConstant.nearestLogBase2(); + + auto *ShiftN = dyn_cast(Shift->getOperand(1)); + if (!ShiftN) + return false; + int ShiftBit = ShiftN->getAPIntValue().getLimitedValue(INT32_MAX); + + // Every rung of the ladder needs to have the same base value (X in the + // above text). If we're not the first rung, ensure our base value is the + // same as previous. + SDNode *ThisBaseVal = Shift->getOperand(0).getNode(); + if (BaseVal && BaseVal != ThisBaseVal) + return false; + + // OK, this is masking and setting a bit. Now which bit, exactly? + auto OriginalBit = Left ? (AndBit - ShiftBit) : (ShiftBit + AndBit); + if (OriginalBit < 0 || OriginalBit > 31) + return false; + + Parts[AndBit] = (int)OriginalBit; + BaseVal = ThisBaseVal; + return true; + } else if (N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRL) { + + // Similar to the above but more simple for when C1 = 0 and C2 = 31: + // + // Y = (X << 31) or Y = (X >> 31) + if (!isa(N->getOperand(1).getNode())) + return false; + auto *C = cast(N->getOperand(1).getNode()); + unsigned Bit = C->getZExtValue(); + if (Bit != C->getValueSizeInBits(0) - 1) + return false; + + auto *ThisBaseVal = N->getOperand(0).getNode(); + if (BaseVal && BaseVal != ThisBaseVal) + return false; + BaseVal = ThisBaseVal; + + if (N->getOpcode() == ISD::SHL) + Parts[Bit] = 0; + else + Parts[0] = Bit; + return true; + } else if (N == BaseVal) { + // Terminate the recursion if we find the base value. + return true; + } else { + dbgs() << "Bailing on ";N->dump(); + // Fail on any other value. + return false; + } +} + +// Try and detect an RBIT - bit reversal. +static SDValue performRBITCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // For X = rbit(Y), we expect a ladder of bit manipulation operations. At + // every rung of the ladder there is a mask of some bit in X, a test of that + // bit with zero, an OR setting a bit in Y and a select depending on the test: + // + // %1 = and X, 1< Parts; + for (int I = 0; I < 64; ++I) + Parts.push_back(-1); + // BaseVal is X in the above text, but a slightly more descriptive name. + SDNode *BaseVal = nullptr; + bool B = CollectRBITParts(N, Parts, BaseVal); + + if (!B) + // No ladder found. + return SDValue(); + + int BitWidth = BaseVal->getValueSizeInBits(0); + + // OK, we have a set of bit remaps. Now check they're in a sequence. + // Ignore -1's for now. + for (int I = 0, RI = BitWidth-1; I < BitWidth; ++I,--RI) + if (Parts[I] != -1 && Parts[I] != RI) + return SDValue(); + // This can either be a pure RBIT (if none of the Parts are -1) or + // a masked combination of the original value and the reversed value. + // + // Compute the masks for each now, and decide profitability. + APInt BaseMask(BitWidth, 0); + for (int I = 0; I < BitWidth; ++I) + if (Parts[I] == -1) + BaseMask.setBit(I); + APInt RevMask = ~BaseMask; + + // Heuristic: if we're setting at least a quarter of the bits from the + // reverse, it's worthwhile doing. + if ((int)RevMask.countPopulation() < BitWidth / 4) + return SDValue(); + + SDLoc dl(N); + SDValue Base(BaseVal, 0); + SDValue Reversed = + DCI.DAG.getNode(ARMISD::RBIT, dl, BaseVal->getValueType(0), Base); + + if (RevMask.isAllOnesValue()) + // No merge required, return early. + return Reversed; + + // Merge the base value (X) and the reversed value (Y) together. + EVT VT = Base.getValueType(); + Base = DCI.DAG.getNode(ISD::AND, dl, VT, Base, + DCI.DAG.getConstant(BaseMask, dl, VT)); + Reversed = DCI.DAG.getNode(ISD::AND, dl, VT, Reversed, + DCI.DAG.getConstant(RevMask, dl, VT)); + + return DCI.DAG.getNode(ISD::OR, dl, VT, Base, Reversed); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -8803,6 +8978,11 @@ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); + // Attempt to match a (partial) RBIT instruction + SDValue RBit = performRBITCombine(N, DCI, Subtarget); + if (RBit) + return RBit; + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -8870,7 +9050,7 @@ } } } - + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. Index: test/CodeGen/ARM/rbit2.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/rbit2.ll @@ -0,0 +1,82 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnu" + +define i32 @f(i32 %a) { +; CHECK: lsr r1, r0, #12 +; CHECK: rbit r0, r0 +; CHECK: bfi r0, r1, #12, #8 +; CHECK: bx lr +entry: + %a.lobit = lshr i32 %a, 31 + %and1 = lshr i32 %a, 29 + %0 = and i32 %and1, 2 + %and6 = lshr i32 %a, 27 + %1 = and i32 %and6, 4 + %and11 = lshr i32 %a, 25 + %2 = and i32 %and11, 8 + %and16 = lshr i32 %a, 23 + %3 = and i32 %and16, 16 + %and21 = lshr i32 %a, 21 + %4 = and i32 %and21, 32 + %and26 = lshr i32 %a, 19 + %5 = and i32 %and26, 64 + %and31 = lshr i32 %a, 17 + %6 = and i32 %and31, 128 + %and36 = lshr i32 %a, 15 + %7 = and i32 %and36, 256 + %and41 = lshr i32 %a, 13 + %8 = and i32 %and41, 512 + %and46 = lshr i32 %a, 11 + %9 = and i32 %and46, 1024 + %and51 = lshr i32 %a, 9 + %10 = and i32 %and51, 2048 + %11 = shl i32 %a, 31 + %and61 = shl i32 %a, 29 + %12 = and i32 %and61, 1073741824 + %and66 = shl i32 %a, 27 + %13 = and i32 %and66, 536870912 + %and71 = shl i32 %a, 25 + %14 = and i32 %and71, 268435456 + %and76 = shl i32 %a, 23 + %15 = and i32 %and76, 134217728 + %and81 = shl i32 %a, 21 + %16 = and i32 %and81, 67108864 + %and86 = shl i32 %a, 19 + %17 = and i32 %and86, 33554432 + %and91 = shl i32 %a, 17 + %18 = and i32 %and91, 16777216 + %and96 = shl i32 %a, 15 + %19 = and i32 %and96, 8388608 + %and101 = shl i32 %a, 13 + %20 = and i32 %and101, 4194304 + %and106 = shl i32 %a, 11 + %21 = and i32 %and106, 2097152 + %and111 = shl i32 %a, 9 + %22 = and i32 %and111, 1048576 + %23 = or i32 %11, %a.lobit + %24 = or i32 %23, %0 + %25 = or i32 %24, %1 + %26 = or i32 %25, %2 + %27 = or i32 %26, %3 + %28 = or i32 %27, %4 + %29 = or i32 %28, %5 + %30 = or i32 %29, %6 + %31 = or i32 %30, %7 + %32 = or i32 %31, %8 + %33 = or i32 %32, %9 + %34 = or i32 %33, %10 + %35 = or i32 %34, %12 + %36 = or i32 %35, %13 + %37 = or i32 %36, %14 + %38 = or i32 %37, %15 + %39 = or i32 %38, %16 + %40 = or i32 %39, %17 + %41 = or i32 %40, %18 + %42 = or i32 %41, %19 + %43 = or i32 %42, %20 + %44 = or i32 %43, %21 + %45 = or i32 %44, %22 + ret i32 %45 +}