diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp --- a/llvm/lib/Transforms/Scalar/BDCE.cpp +++ b/llvm/lib/Transforms/Scalar/BDCE.cpp @@ -9,7 +9,8 @@ // This file implements the Bit-Tracking Dead Code Elimination pass. Some // instructions (shifts, some ands, ors, etc.) kill some of their input bits. // We track these dead bits and remove instructions that compute only these -// dead bits. +// dead bits. We also simplify sext that generates unused extension bits, +// converting it to a zext. // //===----------------------------------------------------------------------===// @@ -19,6 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" @@ -33,6 +35,8 @@ STATISTIC(NumRemoved, "Number of instructions removed (unused)"); STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)"); +STATISTIC(NumSExt2ZExt, + "Number of sign extension instructions converted to zero extension"); /// If an instruction is trivialized (dead), then the chain of users of that /// instruction may need to be cleared of assumptions that can no longer be @@ -109,6 +113,22 @@ continue; } + // Convert SExt into ZExt if none of the extension bits is required + if (SExtInst *SE = dyn_cast(&I)) { + APInt Demanded = DB.getDemandedBits(SE); + const uint32_t SrcBitSize = SE->getSrcTy()->getScalarSizeInBits(); + auto *const DstTy = SE->getDestTy(); + const uint32_t DestBitSize = DstTy->getScalarSizeInBits(); + if (Demanded.countLeadingZeros() >= (DestBitSize - SrcBitSize)) { + IRBuilder<> Builder(&I); + I.replaceAllUsesWith(Builder.CreateZExt(SE->getOperand(0), DstTy)); + Worklist.push_back(&I); + Changed = true; + NumSExt2ZExt++; + continue; + } + } + for (Use &U : I.operands()) { // DemandedBits only detects dead integer uses. if (!U->getType()->isIntOrIntVectorTy()) diff --git a/llvm/test/Transforms/BDCE/sext_multi_uses.ll b/llvm/test/Transforms/BDCE/sext_multi_uses.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/BDCE/sext_multi_uses.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -bdce < %s | FileCheck %s +define i32 @ZEXT_0(i16 %a) { +; CHECK-LABEL: @ZEXT_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 65280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 65280 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i32 @ZEXT_1(i16 %a) { +; CHECK-LABEL: @ZEXT_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[TMP0]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[AND:%.*]] = or i32 [[TMP0]], -65536 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %and = or i32 %ext, 4294901760 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i16 @NOT_ZEXT_0(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], 65280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 9 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[OR]] to i16 +; CHECK-NEXT: ret i16 [[RET]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 65280 + %lsr = lshr i32 %ext, 9 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + %ret = trunc i32 %or to i16 + ret i16 %ret +} + +define i32 @NOT_ZEXT_1(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[EXT]], 85280 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %and = and i32 %ext, 85280 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %or = or i32 %and, %and2 + ret i32 %or +} + +define i32 @NOT_ZEXT_2(i16 %a) { +; CHECK-LABEL: @NOT_ZEXT_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[LSR:%.*]] = lshr i32 [[EXT]], 8 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[LSR]], 255 +; CHECK-NEXT: [[AND:%.*]] = xor i32 [[EXT]], -65536 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; +entry: + %ext = sext i16 %a to i32 + %lsr = lshr i32 %ext, 8 + %and2 = and i32 %lsr, 255 + %and = xor i32 %ext, 4294901760 + %or = or i32 %and, %and2 + ret i32 %or +}