diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2940,6 +2940,21 @@ return Result; } + // BSWAP - most likely due to us previous matching a partial bswap. + if (match(V, m_BSwap(m_Value(X)))) { + const auto &Res = + collectBitParts(X, MatchBSwaps, MatchBitReversals, BPS, Depth + 1); + if (!Res) + return Result; + + Result = BitPart(Res->Provider, BitWidth); + for (unsigned ByteIdx = 0; ByteIdx < BitWidth; ByteIdx += 8) + for (unsigned BitIdx = 0; BitIdx < 8; ++BitIdx) + Result->Provenance[(BitWidth - 8 - ByteIdx) + BitIdx] = + Res->Provenance[ByteIdx + BitIdx]; + return Result; + } + // Funnel 'double' shifts take 3 operands, 2 inputs and the shift // amount (modulo). // fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -3032,10 +3047,15 @@ // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. unsigned DemandedBW = DemandedTy->getBitWidth(); + APInt DemandedMask = APInt::getAllOnesValue(DemandedBW); bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0; bool OKForBitReverse = MatchBitReversals; for (unsigned BitIdx = 0; (BitIdx < DemandedBW) && (OKForBSwap || OKForBitReverse); ++BitIdx) { + if (BitProvenance[BitIdx] == BitPart::Unset) { + DemandedMask.clearBit(BitIdx); + continue; + } OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[BitIdx], BitIdx, DemandedBW); OKForBitReverse &= bitTransformIsCorrectForBitReverse(BitProvenance[BitIdx], @@ -3061,12 +3081,18 @@ Provider = Trunc; } - auto *CI = CallInst::Create(F, Provider, "rev", I); - InsertedInsts.push_back(CI); + Instruction *Result = CallInst::Create(F, Provider, "rev", I); + InsertedInsts.push_back(Result); + + if (!DemandedMask.isAllOnesValue()) { + auto *Mask = ConstantInt::get(DemandedTy, DemandedMask); + Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I); + InsertedInsts.push_back(Result); + } // We may need to zeroextend back to the result type. - if (ITy != CI->getType()) { - auto *ExtInst = CastInst::Create(Instruction::ZExt, CI, ITy, "zext", I); + if (ITy != Result->getType()) { + auto *ExtInst = CastInst::Create(Instruction::ZExt, Result, ITy, "zext", I); InsertedInsts.push_back(ExtInst); } diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -347,10 +347,9 @@ define i64 @bswap_and_mask_0(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_0( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: ret i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -375,13 +374,12 @@ define i64 @bswap_and_mask_2(i64 %0) { ; CHECK-LABEL: @bswap_and_mask_2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 71776119061217280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 71776119061217280 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]] +; CHECK-NEXT: ret i64 [[TMP6]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56 @@ -504,28 +502,8 @@ define i64 @PR47191_problem1(i64 %0) { ; CHECK-LABEL: @PR47191_problem1( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 280375465082880 -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP4]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP6]] -; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[TMP8]] -; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[TMP11]] -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP13]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -553,28 +531,8 @@ define i64 @PR47191_problem2(i64 %0) { ; CHECK-LABEL: @PR47191_problem2( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -602,28 +560,8 @@ define i64 @PR47191_problem3(i64 %0) { ; CHECK-LABEL: @PR47191_problem3( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 16711680 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4278190080 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 71776119061217280 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP2]] -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP12]], [[TMP4]] -; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP13]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[TMP8]] -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP11]] -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 280375465082880 -; CHECK-NEXT: [[TMP19:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 1095216660480 -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], [[TMP18]] -; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], [[TMP16]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = lshr i64 %0, 40 @@ -651,28 +589,8 @@ define i64 @PR47191_problem4(i64 %0) { ; CHECK-LABEL: @PR47191_problem4( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 56 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], 65280 -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP0]], 40 -; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 71776119061217280 -; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 16711680 -; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 280375465082880 -; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP13]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 4278190080 -; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 1095216660480 -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[TMP19]], [[TMP21]] -; CHECK-NEXT: ret i64 [[TMP22]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 56 %3 = shl i64 %0, 56