diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1215,6 +1215,21 @@ Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; + KnownBits Known = computeKnownBits(IIOperand, 0, II); + uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8); + uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8); + + // bswap(x) -> shift(x) if x has exactly one "active byte" + if (Known.getBitWidth() - LZ - TZ == 8) { + assert(LZ != TZ && "active byte cannot be in the middle"); + if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x + return BinaryOperator::CreateNUWShl( + IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ)); + // -> lshr(x) if the "active byte" is in the high part of x + return BinaryOperator::CreateExactLShr( + IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ)); + } + // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getScalarSizeInBits() - diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -358,9 +358,8 @@ define i64 @bs_active_high8(i64 %0) { ; CHECK-LABEL: @bs_active_high8( -; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255 +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = shl i64 %0, 56 %3 = call i64 @llvm.bswap.i64(i64 %2) @@ -369,8 +368,8 @@ define i32 @bs_active_high7(i32 %0) { ; CHECK-LABEL: @bs_active_high7( -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 254 ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = and i32 %0, -33554432 ; 0xfe000000 @@ -380,8 +379,8 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_high4( -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]]) +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = shl <2 x i64> %0, @@ -392,7 +391,7 @@ define <2 x i64> @bs_active_high_different(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_high_different( ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = shl <2 x i64> %0, @@ -427,7 +426,7 @@ define i64 @bs_active_high8_multiuse(i64 %0) { ; CHECK-LABEL: @bs_active_high8_multiuse( ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 255 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret i64 [[TMP4]] ; @@ -440,7 +439,7 @@ define i64 @bs_active_high7_multiuse(i64 %0) { ; CHECK-LABEL: @bs_active_high7_multiuse( ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret i64 [[TMP4]] ; @@ -452,8 +451,8 @@ define i64 @bs_active_byte_6h(i64 %0) { ; CHECK-LABEL: @bs_active_byte_6h( -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 16711680 ; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = and i64 %0, 280375465082880 ; 0xff00'00000000 @@ -463,8 +462,8 @@ define i32 @bs_active_byte_3h(i32 %0) { ; CHECK-LABEL: @bs_active_byte_3h( -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1536 ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = and i32 %0, 393216 ; 0x0006'0000 @@ -475,7 +474,7 @@ define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_byte_3h_v2( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = and <2 x i32> %0, ; 0x0080'0000, 0x0001'0000 @@ -498,8 +497,8 @@ define i16 @bs_active_low1(i16 %0) { ; CHECK-LABEL: @bs_active_low1( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15 -; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], 256 ; CHECK-NEXT: ret i16 [[TMP3]] ; %2 = lshr i16 %0, 15 @@ -509,9 +508,8 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_low8( -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) -; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = and <2 x i32> %0, %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2) @@ -521,7 +519,7 @@ define <2 x i32> @bs_active_low_different(<2 x i32> %0) { ; CHECK-LABEL: @bs_active_low_different( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %2 = and <2 x i32> %0, @@ -556,7 +554,7 @@ define i64 @bs_active_low8_multiuse(i64 %0) { ; CHECK-LABEL: @bs_active_low8_multiuse( ; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret i64 [[TMP4]] ; @@ -569,7 +567,7 @@ define i64 @bs_active_low7_multiuse(i64 %0) { ; CHECK-LABEL: @bs_active_low7_multiuse( ; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret i64 [[TMP4]] ; @@ -581,8 +579,8 @@ define i64 @bs_active_byte_4l(i64 %0) { ; CHECK-LABEL: @bs_active_byte_4l( -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688 -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128 ; CHECK-NEXT: ret i64 [[TMP3]] ; %2 = and i64 %0, 1140850688 ; 0x44000000 @@ -592,8 +590,8 @@ define i32 @bs_active_byte_2l(i32 %0) { ; CHECK-LABEL: @bs_active_byte_2l( -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280 -; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 16711680 ; CHECK-NEXT: ret i32 [[TMP3]] ; %2 = and i32 %0, 65280 ; 0xff00 @@ -604,7 +602,7 @@ define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_byte_2l_v2( ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], -; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = and <2 x i64> %0, ; 0x0100, 0xff00