Index: lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -18,6 +18,7 @@ #include "llvm-c/Transforms/AggressiveInstCombine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" @@ -280,6 +281,33 @@ return MadeChange; } +// This pass uses demanded bits to identify SExtInst that +// can be converted to ZExtInst, as no sign bits are used. + +bool SExtToZExt(Function &F, DominatorTree &DT) { + bool Changes = false; + AssumptionCache AC(F); + DemandedBits DB(F, AC, DT); + for (auto &BB : F) { + for (auto &I : BB) { + SExtInst *SE = dyn_cast(&I); + if (!SE) + continue; + + APInt Demanded = DB.getDemandedBits(&I); + const uint32_t SrcBitSize = SE->getSrcTy()->getScalarSizeInBits(); + const auto DstTy = SE->getDestTy(); + const uint32_t DestBitSize = DstTy->getScalarSizeInBits(); + if (Demanded.countLeadingZeros() >= (DestBitSize - SrcBitSize)) { + IRBuilder<> Builder(&I); + I.replaceAllUsesWith(Builder.CreateZExt(SE->getOperand(0), DstTy)); + Changes = true; + } + } + } + return Changes; +} + /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) { @@ -288,6 +316,7 @@ TruncInstCombine TIC(TLI, DL, DT); MadeChange |= TIC.run(F); MadeChange |= foldUnusualPatterns(F, DT); + MadeChange |= SExtToZExt(F, DT); return MadeChange; } Index: test/Transforms/AggressiveInstCombine/sext_multi_uses.ll =================================================================== --- /dev/null +++ test/Transforms/AggressiveInstCombine/sext_multi_uses.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -o - -aggressive-instcombine -dce -S %s | FileCheck %s + +define i32 @ZEXT_0(i16 %a) { +; CHECK-LABEL: @ZEXT_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 65280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 65280 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i32 @ZEXT_1(i16 %a) { +; CHECK-LABEL: @ZEXT_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[AND:%.*]] = or i32 [[TMP0]], -65536 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %and = or i32 %ext, 4294901760 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i16 @NOT_ZEXT_0(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], 65280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 9 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i16 +; CHECK-NEXT: ret i16 [[RET]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 65280 + %lsr = lshr i32 %ext, 9 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + %ret = trunc i32 %or to i16 + ret i16 %ret +} + +define i32 @NOT_ZEXT_1(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], 85280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 85280 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i32 @NOT_ZEXT_2(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[AND:%.*]] = xor i32 [[EXT]], -65536 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %and = xor i32 %ext, 4294901760 + %or = or i32 %and, %and2 + ret i32 %or +} +