Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1597,6 +1597,29 @@ SmallVectorImpl &BitValues, SmallVectorImpl &BitProvenance) { if (Instruction *I = dyn_cast(V)) { + // If this is a bitreverse intrinsic, it can obviously be part of a + // bitreverse/bswap. + // FIXME: Do the same for Intrinsic::bswap! + if (isa(I) && + cast(I)->getIntrinsicID() == Intrinsic::bitreverse) { + auto *V = I->getOperand(0); + // We don't have to demand a contiguous range of bits from a bitreverse. + // We know all bits will be in the correct order. + auto BW = BitMask.getBitWidth(); + unsigned DestBitNo = OverallLeftShift; + for (unsigned I = 0; I < BW - OverallLeftShift; ++I) + if (BitMask[I] && BitValues[DestBitNo + I] && + BitValues[DestBitNo + I] != V) + return true; + + for (unsigned I = 0; I < BitMask.getBitWidth() - OverallLeftShift; ++I) + if (BitMask[I]) { + BitProvenance[DestBitNo + I] = BW - I - 1; + BitValues[DestBitNo + I] = V; + } + return false; + } + // If this is an or instruction, it may be an inner node of the bswap. if (I->getOpcode() == Instruction::Or) return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, @@ -1724,17 +1747,25 @@ return nullptr; // Check to see if all of the bits come from the same value. - Value *V = BitValues[0]; - if (!V) return nullptr; // Didn't find a bit? Must be zero. - - if (!std::all_of(BitValues.begin(), BitValues.end(), - [&](const Value *X) { return X == V; })) + Value *V = nullptr; + for (unsigned I = 0, E = BitValues.size(); I != E; ++I) { + if (BitValues[I] && !V) + V = BitValues[I]; + else if (BitProvenance[I] != -1 && BitValues[I] != V) + return nullptr; + } + if (!V) return nullptr; - + // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; - for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { + bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; + APInt IgnoreMask(BW, 0); + for (int i = 0, e = BitValues.size(); i != e; ++i) { + if (BitProvenance[i] == -1 || BitProvenance[i] == i) { + IgnoreMask.setBit(i); + continue; + } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); @@ -1747,10 +1778,25 @@ Intrin = Intrinsic::bitreverse; else return nullptr; + + // We know we can create a bswap or bitreverse out of this. But is it worth + // it? Heuristically let's say it's worth it if we're setting at least half + // of the bits of the resulting value from the bitreverse/bswap. + if (IgnoreMask.countPopulation() > BW / 2) + return nullptr; Module *M = I.getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrin, ITy); - return CallInst::Create(F, V); + auto *CI = CallInst::Create(F, V); + + if (!IgnoreMask.getBoolValue()) + return CI; + + auto *And = BinaryOperator::Create(Instruction::And, CI, + ConstantInt::get(ITy, ~IgnoreMask)); + Worklist.Add(CI); + CI->insertBefore(&I); + return And; } /// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0) Index: test/Transforms/InstCombine/bitreverse-partial.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/bitreverse-partial.ll @@ -0,0 +1,37 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +; CHECK-LABEL: @partial_i4 +; CHECK: %[[x:.*]] = call i4 @llvm.bitreverse.i4(i4 %a) +; CHECK: %[[y:.*]] = and i4 %[[x]], -2 +; CHECK: ret i4 %[[y]] +define i4 @partial_i4(i4 %a) #0 { +entry: + %and = shl i4 %a, 3 + %0 = and i4 %and, 8 + %and1 = shl i4 %a, 1 + %1 = and i4 %and1, 4 + %2 = or i4 %1, %0 + %and6 = lshr i4 %a, 1 + %3 = and i4 %and6, 2 + %b.1 = or i4 %2, %3 + ret i4 %b.1 +} + +; CHECK-LABEL: @partial_i4_or_2 +; CHECK: %[[x:.*]] = call i4 @llvm.bitreverse.i4(i4 %a) +; CHECK: %[[y:.*]] = and i4 %[[x]], -4 +; CHECK: %[[z:.*]] = or i4 %[[y]], 2 +; CHECK: ret i4 %[[z]] +define i4 @partial_i4_or_2(i4 %a) #0 { +entry: + %and = shl i4 %a, 3 + %0 = and i4 %and, 8 + %and1 = shl i4 %a, 1 + %1 = and i4 %and1, 4 + %2 = or i4 %1, %0 + %and6 = lshr i4 %a, 1 + %3 = and i4 %and6, 2 + %b.1 = or i4 %2, %3 + %z = or i4 %b.1, 2 + ret i4 %z +}