diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2046,29 +2046,18 @@ Op1 = Ext->getOperand(0); // (A | B) | C and A | (B | C) -> bswap if possible. - bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())); - - // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value())); - - // (A & B) | (C & D) -> bswap if possible. - bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value())); - - // (A << B) | (C & D) -> bswap if possible. - // The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a - // part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935, - // C2 = 8 for i32). - // This pattern can occur when the operands of the 'or' are not canonicalized - // for some reason (not having only one use, for example). - bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_And(m_Value(), m_Value()))) || - (match(Op0, m_And(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value()))); - - if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh) + bool OrWithOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); + + // (A >> B) | C and (A << B) | C -> bswap if possible. + bool OrWithShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) || + match(Op1, m_LogicalShift(m_Value(), m_Value())); + + // (A & B) | C and A | (B & C) -> bswap if possible. + bool OrWithAnds = match(Op0, m_And(m_Value(), m_Value())) || + match(Op1, m_And(m_Value(), m_Value())); + + if (!OrWithOrs && !OrWithShifts && !OrWithAnds) return nullptr; SmallVector Insts; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2940,6 +2940,24 @@ return Result; } + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + unsigned ByteWidth = BitWidth / 8; + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < ByteWidth; ++ByteIdx) { + unsigned ByteBitOfs = ByteIdx * 8; + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteBitOfs) + BitIdx] = + Res->Provenance[ByteBitOfs + BitIdx]; + } + return Result; + } + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift // amount (modulo). // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -3029,13 +3047,22 @@ DemandedTy = IntegerType::get(I->getContext(), BitProvenance.size()); } + // Check BitProvenance hasn't found a source larger than the result type. + unsigned DemandedBW = DemandedTy->getBitWidth(); + if (DemandedBW > ITy->getBitWidth()) + return false; + // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. - unsigned DemandedBW = DemandedTy->getBitWidth(); + APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; bool OKForBitReverse = MatchBitReversals; for (unsigned BitIdx = 0; (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, DemandedBW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], @@ -3061,12 +3088,18 @@ Provider = Trunc; } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnesValue()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } // We may need to zeroextend back to the result type. - if (ITy != CI->getType()) { - auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I); InsertedInsts.push_back(ExtInst); } diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -534,14 +534,8 @@ define i32 @partial_bswap(i32 %x) { ; CHECK-LABEL: @partial_bswap( -; CHECK-NEXT: [[X3:%.*]] = shl i32 [[X:%.*]], 24 -; CHECK-NEXT: [[A2:%.*]] = shl i32 [[X]], 8 -; CHECK-NEXT: [[X2:%.*]] = and i32 [[A2]], 16711680 -; CHECK-NEXT: [[X32:%.*]] = or i32 [[X3]], [[X2]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[X]], -65536 -; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.bswap.i32(i32 [[T1]]) -; CHECK-NEXT: [[R:%.*]] = or i32 [[X32]], [[T2]] -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x3 = shl i32 %x, 24 %a2 = shl i32 %x, 8 @@ -578,10 +572,9 @@ define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: ret i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -606,13 +599,9 @@ define i64 @bswap_and_mask_2(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037862401 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -735,28 +724,8 @@ define i64 @PR47191_problem1(i64 %0) { ; CHECK-LABEL: @PR47191_problem1( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880 -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]] -; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]] -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -784,28 +753,8 @@ define i64 @PR47191_problem2(i64 %0) { ; CHECK-LABEL: @PR47191_problem2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -833,28 +782,8 @@ define i64 @PR47191_problem3(i64 %0) { ; CHECK-LABEL: @PR47191_problem3( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -882,28 +811,8 @@ define i64 @PR47191_problem4(i64 %0) { ; CHECK-LABEL: @PR47191_problem4( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280 -; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680 -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880 -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080 -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56