diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp --- a/llvm/lib/Transforms/Scalar/BDCE.cpp +++ b/llvm/lib/Transforms/Scalar/BDCE.cpp @@ -9,7 +9,8 @@ // This file implements the Bit-Tracking Dead Code Elimination pass. Some // instructions (shifts, some ands, ors, etc.) kill some of their input bits. // We track these dead bits and remove instructions that compute only these -// dead bits. +// dead bits. We also simplify sext that generates unused extension bits, +// converting it to a zext. // //===----------------------------------------------------------------------===// @@ -19,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" @@ -33,6 +35,8 @@ STATISTIC(NumRemoved, "Number of instructions removed (unused)"); STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)"); +STATISTIC(NumSExt2ZExt, + "Number of sign extension instructions converted to zero extension"); /// If an instruction is trivialized (dead), then the chain of users of that /// instruction may need to be cleared of assumptions that can no longer be @@ -109,6 +113,23 @@ continue; } + // Convert SExt into ZExt if none of the extension bits is required + if (SExtInst *SE = dyn_cast(&I)) { + APInt Demanded = DB.getDemandedBits(SE); + const uint32_t SrcBitSize = SE->getSrcTy()->getScalarSizeInBits(); + auto *const DstTy = SE->getDestTy(); + const uint32_t DestBitSize = DstTy->getScalarSizeInBits(); + if (Demanded.countLeadingZeros() >= (DestBitSize - SrcBitSize)) { + clearAssumptionsOfUsers(SE, DB); + IRBuilder<> Builder(SE); + I.replaceAllUsesWith(Builder.CreateZExt(SE->getOperand(0), DstTy)); + Worklist.push_back(SE); + Changed = true; + NumSExt2ZExt++; + continue; + } + } + for (Use &U : I.operands()) { // DemandedBits only detects dead integer uses. if (!U->getType()->isIntOrIntVectorTy()) diff --git a/llvm/test/Transforms/BDCE/sext_multi_uses.ll b/llvm/test/Transforms/BDCE/sext_multi_uses.ll --- a/llvm/test/Transforms/BDCE/sext_multi_uses.ll +++ b/llvm/test/Transforms/BDCE/sext_multi_uses.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -o - -bdce -S %s | FileCheck %s +; RUN: opt -S -bdce < %s | FileCheck %s define i32 @ZEXT_0(i16 %a) { ; CHECK-LABEL: @ZEXT_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], 65280 -; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 65280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] ; CHECK-NEXT: ret i32 [[OR]] @@ -22,10 +22,10 @@ define i32 @ZEXT_1(i16 %a) { ; CHECK-LABEL: @ZEXT_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 -; CHECK-NEXT: [[AND:%.*]] = or i32 [[EXT]], -65536 +; CHECK-NEXT: [[AND:%.*]] = or i32 [[TMP0]], -65536 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] ; CHECK-NEXT: ret i32 [[OR]] ; @@ -99,8 +99,8 @@ define i16 @clear_assumptions(i8 %x, i16 %y) { ; CHECK-LABEL: @clear_assumptions( -; CHECK-NEXT: [[EXT:%.*]] = sext i8 [[X:%.*]] to i16 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[EXT]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[ADD:%.*]] = add i16 [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and i16 [[ADD]], 255 ; CHECK-NEXT: ret i16 [[AND]] ;