diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3010,26 +3010,35 @@ IntegerType *ITy = dyn_cast(I->getType()); if (!ITy || ITy->getBitWidth() > 128) return false; // Can't do vectors or integers > 128 bits. - unsigned BW = ITy->getBitWidth(); - unsigned DemandedBW = BW; IntegerType *DemandedTy = ITy; - if (I->hasOneUse()) { - if (TruncInst *Trunc = dyn_cast(I->user_back())) { + if (I->hasOneUse()) + if (auto *Trunc = dyn_cast(I->user_back())) DemandedTy = cast(Trunc->getType()); - DemandedBW = DemandedTy->getBitWidth(); - } - } // Try to find all the pieces corresponding to the bswap. std::map> BPS; auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0); if (!Res) return false; - auto &BitProvenance = Res->Provenance; + ArrayRef BitProvenance = Res->Provenance; + + // If the upper half is zero, then attempt to perform as a truncated op. + if (isPowerOf2_32(BitProvenance.size())) { + // Keep halving the demanded width while the upper half is zero. + while (BitProvenance.size() > 2) { + unsigned HalfBW = BitProvenance.size() / 2; + ArrayRef UpperBits = BitProvenance.slice(HalfBW); + if (!llvm::all_of(UpperBits, [](int8_t x) { return x < 0; })) + break; + BitProvenance = BitProvenance.slice(0, HalfBW); + } + DemandedTy = IntegerType::get(I->getContext(), BitProvenance.size()); + } // Now, is the bit permutation correct for a bswap or a bitreverse? We can // only byteswap values with an even number of bytes. + unsigned DemandedBW = DemandedTy->getBitWidth(); bool OKForBSwap = DemandedBW % 16 == 0, OKForBitReverse = true; for (unsigned i = 0; i < DemandedBW; ++i) { OKForBSwap &= diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -187,8 +187,8 @@ define i16 @test8(i16 %a) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: ret i16 [[REV]] +; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[OR]] ; %conv = zext i16 %a to i32 %shr = lshr i16 %a, 8 @@ -201,8 +201,8 @@ define i16 @test9(i16 %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: ret i16 [[REV]] +; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[OR]] ; %conv = zext i16 %a to i32 %shr = lshr i32 %conv, 8 @@ -229,18 +229,10 @@ define i64 @PR39793_bswap_u64_as_u32(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u32( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24 -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 16711680 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP0]], 24 -; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 4278190080 -; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP9]], [[TMP11]] -; CHECK-NEXT: ret i64 [[TMP12]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i32 +; CHECK-NEXT: [[REV:%.*]] = call i32 @llvm.bswap.i32(i32 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[REV]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 24 %3 = and i64 %2, 255 @@ -258,13 +250,10 @@ define i16 @PR39793_bswap_u64_as_u32_trunc(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u32_trunc( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24 -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i16 -; CHECK-NEXT: ret i16 [[TMP7]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i32 +; CHECK-NEXT: [[REV:%.*]] = call i32 @llvm.bswap.i32(i32 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[REV]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] ; %2 = lshr i64 %0, 24 %3 = and i64 %2, 255 @@ -283,12 +272,10 @@ define i64 @PR39793_bswap_u64_as_u16(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u16( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]] -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %2 = lshr i64 %0, 8 %3 = and i64 %2, 255 @@ -300,9 +287,9 @@ define i8 @PR39793_bswap_u64_as_u16_trunc(i64 %0) { ; CHECK-LABEL: @PR39793_bswap_u64_as_u16_trunc( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i8 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[REV1:%.*]] = lshr i64 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[REV1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; %2 = lshr i64 %0, 8 %3 = and i64 %2, 255 @@ -315,12 +302,10 @@ define i32 @PR39793_bswap_u32_as_u16(i32 %0) { ; CHECK-LABEL: @PR39793_bswap_u32_as_u16( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255 -; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 65280 -; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP3]], [[TMP5]] -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i32 +; CHECK-NEXT: ret i32 [[TMP2]] ; %2 = lshr i32 %0, 8 %3 = and i32 %2, 255 @@ -332,9 +317,9 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { ; CHECK-LABEL: @PR39793_bswap_u32_as_u16_trunc( -; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[REV1:%.*]] = lshr i32 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[REV1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; %2 = lshr i32 %0, 8 %3 = and i32 %2, 255