diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" @@ -327,11 +328,32 @@ return false; } +// This pass uses demanded bits to identify SExtInst that +// can be converted to ZExtInst, as no sign bits are used. +static bool SExtToZExt(Instruction &I, DemandedBits &DB) { + SExtInst *SE = dyn_cast(&I); + if (!SE) + return false; + + APInt Demanded = DB.getDemandedBits(&I); + const uint32_t SrcBitSize = SE->getSrcTy()->getScalarSizeInBits(); + const auto DstTy = SE->getDestTy(); + const uint32_t DestBitSize = DstTy->getScalarSizeInBits(); + if (Demanded.countLeadingZeros() >= (DestBitSize - SrcBitSize)) { + IRBuilder<> Builder(&I); + I.replaceAllUsesWith(Builder.CreateZExt(SE->getOperand(0), DstTy)); + return true; + } + return false; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { bool MadeChange = false; + AssumptionCache AC(F); + DemandedBits DB(F, AC, DT); for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. if (!DT.isReachableFromEntry(&BB)) @@ -345,7 +367,8 @@ for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedRotateToFunnelShift(I); - MadeChange |= tryToRecognizePopCount(I); + MadeChange |= tryToRecognizePopCount(I); + MadeChange |= SExtToZExt(I, DB); } } diff --git a/llvm/test/Transforms/AggressiveInstCombine/sext_multi_uses.ll b/llvm/test/Transforms/AggressiveInstCombine/sext_multi_uses.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/sext_multi_uses.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -o - -aggressive-instcombine -dce -S %s | FileCheck %s +define i32 @ZEXT_0(i16 %a) { +; CHECK-LABEL: @ZEXT_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 65280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 65280 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i32 @ZEXT_1(i16 %a) { +; CHECK-LABEL: @ZEXT_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[AND:%.*]] = or i32 [[TMP0]], -65536 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %and = or i32 %ext, 4294901760 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i16 @NOT_ZEXT_0(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], 65280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 9 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i16 +; CHECK-NEXT: ret i16 [[RET]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 65280 + %lsr = lshr i32 %ext, 9 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + %ret = trunc i32 %or to i16 + ret i16 %ret +} + +define i32 @NOT_ZEXT_1(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], 85280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 85280 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i32 @NOT_ZEXT_2(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[AND:%.*]] = xor i32 [[EXT]], -65536 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %and = xor i32 %ext, 4294901760 + %or = or i32 %and, %and2 + ret i32 %or +} +