diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2046,29 +2046,18 @@ Op1 = Ext->getOperand(0); // (A | B) | C and A | (B | C) -> bswap if possible. - bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())); + bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value())); + bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) || + match(Op1, m_LogicalShift(m_Value(), m_Value())); // (A & B) | (C & D) -> bswap if possible. - bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value())); - - // (A << B) | (C & D) -> bswap if possible. - // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a - // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935, - // C2 = 8 for i32). - // This pattern can occur when the operands of the 'or' are not canonicalized - // for some reason (not having only one use, for example). - bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value()))) || - (match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value()))); - - if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh) + bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) || + match(Op1, m_And(m_Value(), m_Value())); + + if (!OrWithOrs && !OrWithShifts && !OrWithAnds) return nullptr; SmallVector Insts; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2940,6 +2940,24 @@ return Result; } + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + unsigned ByteWidth = BitWidth / 8; + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { + unsigned ByteBitOfs = ByteIdx * 8; + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = + Res->Provenance[ByteBitOfs + BitIdx]; + } + return Result; + } + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift // amount (modulo). // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -3032,10 +3050,15 @@ // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. unsigned DemandedBW = DemandedTy->getBitWidth(); + APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; bool OKForBitReverse = MatchBitReversals; for (unsigned BitIdx = 0; (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, DemandedBW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], @@ -3061,12 +3084,18 @@ Provider = Trunc; } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnesValue()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } // We may need to zeroextend back to the result type. - if (ITy != CI->getType()) { - auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I); InsertedInsts.push_back(ExtInst); } diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -345,12 +345,27 @@ ret i8 %7 } +define i32 @partial_bswap(i32 %x) { +; CHECK-LABEL: @partial_bswap( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; + %x3 = shl i32 %x, 24 + %a2 = shl i32 %x, 8 + %x2 = and i32 %a2, 16711680 + %x32 = or i32 %x3, %x2 + %t1 = and i32 %x, -65536 + %t2 = call i32 @llvm.bswap.i32(i32 %t1) + %r = or i32 %x32, %t2 + ret i32 %r +} +declare i32 @llvm.bswap.i32(i32) + define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: ret i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -375,13 +390,9 @@ define i64 @bswap_and_mask_2(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037862401 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -504,28 +515,8 @@ define i64 @PR47191_problem1(i64 %0) { ; CHECK-LABEL: @PR47191_problem1( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880 -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]] -; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]] -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -553,28 +544,8 @@ define i64 @PR47191_problem2(i64 %0) { ; CHECK-LABEL: @PR47191_problem2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -602,28 +573,8 @@ define i64 @PR47191_problem3(i64 %0) { ; CHECK-LABEL: @PR47191_problem3( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -651,28 +602,8 @@ define i64 @PR47191_problem4(i64 %0) { ; CHECK-LABEL: @PR47191_problem4( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280 -; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680 -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880 -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080 -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56