Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1724,17 +1724,25 @@ return nullptr; // Check to see if all of the bits come from the same value. - Value *V = BitValues[0]; - if (!V) return nullptr; // Didn't find a bit? Must be zero. - - if (!std::all_of(BitValues.begin(), BitValues.end(), - [&](const Value *X) { return X == V; })) + Value *V = nullptr; + for (unsigned I = 0, E = BitValues.size(); I != E; ++I) { + if (BitValues[I] && !V) + V = BitValues[I]; + else if (BitProvenance[I] != -1 && BitValues[I] != V) + return nullptr; + } + if (!V) return nullptr; - + // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; - for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { + bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; + APInt IgnoreMask(BW, 0); + for (int i = 0, e = BitValues.size(); i != e; ++i) { + if (BitProvenance[i] == -1 || BitProvenance[i] == i) { + IgnoreMask.setBit(i); + continue; + } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); @@ -1747,10 +1755,45 @@ Intrin = Intrinsic::bitreverse; else return nullptr; + + // If we only have a partial match (IgnoreMask != 0), it's possible that we + // started looking at the interior of a larger pattern. Instead of being + // greedy and taking the partial match, see if adding an exterior node would + // improve the match and if so bail out. + if (IgnoreMask.getBoolValue() && + Operator::getOpcode(&I) == Instruction::Or && + I.hasOneUse() && + Operator::getOpcode(*I.user_begin()) == Instruction::Or) { + auto *Parent = *I.user_begin(); + auto *OtherNode = (Parent->getOperand(0) == &I) ? Parent->getOperand(1) + : Parent->getOperand(0); + SmallVector NewBitValues(BW, nullptr); + SmallVector NewBitProvenance(BW, -1); + APInt NewBitMask = APInt::getAllOnesValue(NewBitValues.size()); + if (isa(OtherNode)) { + bool OK = !CollectBitParts(OtherNode, 0, BitMask, BitValues, BitProvenance); + if (OK) + return nullptr; + } + } + + // We know we can create a bswap or bitreverse out of this. But is it worth + // it? Heuristically let's say it's worth it if we're setting at least half + // of the bits of the resulting value from the bitreverse/bswap. + if (IgnoreMask.countPopulation() > BW / 2) + return nullptr; Module *M = I.getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrin, ITy); - return CallInst::Create(F, V); + auto *CI = CallInst::Create(F, V); + + if (!IgnoreMask.getBoolValue()) + return CI; + + auto *And = BinaryOperator::Create(Instruction::And, CI, + ConstantInt::get(ITy, ~IgnoreMask)); + CI->insertBefore(&I); + return And; } /// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0) Index: test/Transforms/InstCombine/bitreverse-partial.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/bitreverse-partial.ll @@ -0,0 +1,37 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +; CHECK-LABEL: @partial_i4 +; CHECK: %[[x:.*]] = call i4 @llvm.bitreverse.i4(i4 %a) +; CHECK: %[[y:.*]] = and i4 %[[x]], -2 +; CHECK: ret i4 %[[y]] +define i4 @partial_i4(i4 %a) #0 { +entry: + %and = shl i4 %a, 3 + %0 = and i4 %and, 8 + %and1 = shl i4 %a, 1 + %1 = and i4 %and1, 4 + %2 = or i4 %1, %0 + %and6 = lshr i4 %a, 1 + %3 = and i4 %and6, 2 + %b.1 = or i4 %2, %3 + ret i4 %b.1 +} + +; CHECK-LABEL: @partial_i4_or_2 +; CHECK: %[[x:.*]] = call i4 @llvm.bitreverse.i4(i4 %a) +; CHECK: %[[y:.*]] = and i4 %[[x]], -4 +; CHECK: %[[z:.*]] = or i4 %[[y]], 2 +; CHECK: ret i4 %[[z]] +define i4 @partial_i4_or_2(i4 %a) #0 { +entry: + %and = shl i4 %a, 3 + %0 = and i4 %and, 8 + %and1 = shl i4 %a, 1 + %1 = and i4 %and1, 4 + %2 = or i4 %1, %0 + %and6 = lshr i4 %a, 1 + %3 = and i4 %and6, 2 + %b.1 = or i4 %2, %3 + %z = or i4 %b.1, 2 + ret i4 %z +}