Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -35,6 +35,9 @@ /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); +/** See llvm::createBitTrackingDCEPass function. */ +void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM); + /** See llvm::createAlignmentFromAssumptionsPass function. */ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -65,6 +65,7 @@ void initializeAAEvalPass(PassRegistry&); void initializeAddDiscriminatorsPass(PassRegistry&); void initializeADCEPass(PassRegistry&); +void initializeBDCEPass(PassRegistry&); void initializeAliasAnalysisAnalysisGroup(PassRegistry&); void initializeAliasAnalysisCounterPass(PassRegistry&); void initializeAliasDebuggerPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -49,6 +49,7 @@ (void) llvm::createAAEvalPass(); (void) llvm::createAggressiveDCEPass(); + (void) llvm::createBitTrackingDCEPass(); (void) llvm::createAliasAnalysisCounterPass(); (void) llvm::createAliasDebugger(); (void) llvm::createArgumentPromotionPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -82,6 +82,13 @@ //===----------------------------------------------------------------------===// // +// BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to +// remove computations of dead bits. +// +FunctionPass *createBitTrackingDCEPass(); + +//===----------------------------------------------------------------------===// +// // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. // FunctionPass *createSROAPass(bool RequiresDomTree = true); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -250,6 +250,11 @@ MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset MPM.add(createSCCPPass()); // Constant prop with SCCP + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); Index: lib/Transforms/Scalar/BDCE.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/BDCE.cpp @@ -0,0 +1,381 @@ +//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Bit-Tracking Dead Code Elimination pass. Some +// instructions (shifts, some ands, ors, etc.) kill some of their input bits. +// We track these dead bits and remove instructions that compute only these +// dead bits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "bdce" + +STATISTIC(NumRemoved, "Number of instructions removed (unused)"); +STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)"); + +namespace { + struct BDCE : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + BDCE() : FunctionPass(ID) { + initializeBDCEPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function& F) override; + + void getAnalysisUsage(AnalysisUsage& AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + } + + AssumptionCache *AC; + const DataLayout *DL; + DominatorTree *DT; + }; +} + +char BDCE::ID = 0; +INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination", + false, false) + +static bool isAlwaysLive(Instruction *I) { + return isa(I) || isa(I) || + isa(I) || I->mayHaveSideEffects(); +} + +bool BDCE::runOnFunction(Function& F) { + if (skipOptnoneFunction(F)) + return false; + + AC = &getAnalysis().getAssumptionCache(F); + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + DT = &getAnalysis().getDomTree(); + + DenseMap AliveBits; + SmallVector Worklist; + + // The set of visited instructions (non-integer-typed only). + SmallPtrSet Visited; + + // Collect the set of "root" instructions that are known live. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (!isAlwaysLive(I.getInstructionIterator())) + continue; + + // For integer-valued instructions, set up an initial empty set of alive + // bits and add the instruction to the work list. For other instructions + // add their operands to the work list (for integer values operands, mark + // all bits as live). + if (IntegerType *IT = dyn_cast(I->getType())) { + AliveBits[I.getInstructionIterator()] = APInt(IT->getBitWidth(), 0); + Worklist.push_back(I.getInstructionIterator()); + } else { + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (Instruction* J = dyn_cast(OI)) { + if (IntegerType *IT = dyn_cast(J->getType())) + AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth()); + Worklist.push_back(J); + } + } + } + + // Propagate liveness backwards to operands. + while (!Worklist.empty()) { + Instruction* UserI = Worklist.pop_back_val(); + + DEBUG(dbgs() << "BDCE: Visiting: " << *UserI); + APInt AOut; + if (UserI->getType()->isIntegerTy()) { + AOut = AliveBits[UserI]; + DEBUG(dbgs() << " Alive Out: " << AOut); + } + DEBUG(dbgs() << "\n"); + + APInt KnownZero, KnownOne, KnownZero2, KnownOne2; + auto ComputeKnownBits = [&](unsigned BitWidth, Value *V1, Value *V2) { + KnownZero = APInt(BitWidth, 0); + KnownOne = APInt(BitWidth, 0); + computeKnownBits(V1, KnownZero, KnownOne, DL, 0, AC, UserI, DT); + + if (V2) { + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, AC, + UserI, DT); + } + }; + + // Compute the set of alive bits for each operand. These are anded into the + // existing set, if any, and if that changes the set of alive bits, the + // operand is added to the work-list. + for (Instruction::op_iterator OI = UserI->op_begin(), OE = UserI->op_end(); + OI != OE; ++OI) { + if (Instruction* I = dyn_cast(OI)) { + if (IntegerType *IT = dyn_cast(I->getType())) { + unsigned BitWidth = IT->getBitWidth(); + APInt AB = APInt::getAllOnesValue(BitWidth); + if (UserI->getType()->isIntegerTy() && !AOut && + !isAlwaysLive(UserI)) { + AB = APInt(BitWidth, 0); + } else { + // If all bits of the output are dead, then all bits of the input + // Bits of each operand that are used to compute alive bits of the + // output are alive, all others are dead. + switch (UserI->getOpcode()) { + default: break; + case Instruction::Call: + case Instruction::Invoke: + if (IntrinsicInst *II = dyn_cast(UserI)) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::bswap: + // The alive bits of the input are the swapped alive bits of + // the output. + AB = AOut.byteSwap(); + break; + case Intrinsic::ctlz: + if (OI->getOperandNo() == 0) { + // We need some output bits, so we need all bits of the + // input to the left of, and including, the leftmost bit + // known to be one. + ComputeKnownBits(BitWidth, I, nullptr); + AB = APInt::getHighBitsSet(BitWidth, + std::min(BitWidth, KnownOne.countLeadingZeros()+1)); + } + break; + case Intrinsic::cttz: + if (OI->getOperandNo() == 0) { + // We need some output bits, so we need all bits of the + // input to the right of, and including, the rightmost bit + // known to be one. + ComputeKnownBits(BitWidth, I, nullptr); + AB = APInt::getLowBitsSet(BitWidth, + std::min(BitWidth, KnownOne.countTrailingZeros()+1)); + } + break; + } + break; + case Instruction::Add: + case Instruction::Sub: + // Find the highest live output bit. We don't need any more input + // bits than that (adds, and thus subtracts, ripple only to the + // left). + AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits()); + break; + case Instruction::Shl: + if (OI->getOperandNo() == 0) + if (ConstantInt *CI = + dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.lshr(ShiftAmt); + + // If the shift is nuw/nsw, then the high bits are not dead + // (because we've promised that they *must* be zero). + ShlOperator *S = cast(UserI); + if (S->hasNoSignedWrap()) + AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); + else if (S->hasNoUnsignedWrap()) + AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::LShr: + if (OI->getOperandNo() == 0) + if (ConstantInt *CI = + dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.shl(ShiftAmt); + + // If the shift is exact, then the low bits are not dead + // (they must be zero). + if (cast(UserI)->isExact()) + AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::AShr: + if (OI->getOperandNo() == 0) + if (ConstantInt *CI = + dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + AB = AOut.shl(ShiftAmt); + // Because the high input bit is replicated into the + // high-order bits of the result, if we need any of those + // bits, then we must keep the highest input bit. + if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt)) + .getBoolValue()) + AB.setBit(BitWidth-1); + + // If the shift is exact, then the low bits are not dead + // (they must be zero). + if (cast(UserI)->isExact()) + AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + } + break; + case Instruction::And: + AB = AOut; + + // For bits that are known zero, the corresponding bits in the + // other operand are dead (unless they're both zero, in which + // case they can't both be dead, so just mark the LHS bits as + // dead). + if (OI->getOperandNo() == 0) { + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + AB &= ~KnownZero2; + } else { + if (!isa(UserI->getOperand(0))) + ComputeKnownBits(BitWidth, UserI->getOperand(0), I); + AB &= ~(KnownZero & ~KnownZero2); + } + break; + case Instruction::Or: + AB = AOut; + + // For bits that are known one, the corresponding bits in the + // other operand are dead (unless they're both one, in which + // case they can't both be dead, so just mark the LHS bits as + // dead). + if (OI->getOperandNo() == 0) { + ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); + AB &= ~KnownOne2; + } else { + if (!isa(UserI->getOperand(0))) + ComputeKnownBits(BitWidth, UserI->getOperand(0), I); + AB &= ~(KnownOne & ~KnownOne2); + } + break; + case Instruction::Xor: + case Instruction::PHI: + AB = AOut; + break; + case Instruction::Trunc: + AB = AOut.zext(BitWidth); + break; + case Instruction::ZExt: + AB = AOut.trunc(BitWidth); + break; + case Instruction::SExt: + AB = AOut.trunc(BitWidth); + // Because the high input bit is replicated into the + // high-order bits of the result, if we need any of those + // bits, then we must keep the highest input bit. + if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(), + AOut.getBitWidth() - BitWidth)) + .getBoolValue()) + AB.setBit(BitWidth-1); + break; + case Instruction::Select: + if (OI->getOperandNo() != 0) + AB = AOut; + break; + } + } + + // If we've added to the set of alive bits (or the operand has not + // been previously visited), then re-queue the operand to be visited + // again. + APInt ABPrev(BitWidth, 0); + auto ABI = AliveBits.find(I); + if (ABI != AliveBits.end()) + ABPrev = ABI->second; + + APInt ABNew = AB | ABPrev; + if (ABNew != ABPrev || ABI == AliveBits.end()) { + AliveBits[I] = ABNew; + Worklist.push_back(I); + } + } else if (!Visited.count(I)) { + Worklist.push_back(I); + } + } + } + + if (!UserI->getType()->isIntegerTy()) + Visited.insert(UserI); + } + + // The inverse of the live set is the dead set. These are those instructions + // which have no side effects and do not influence the control flow or return + // value of the function, and may therefore be deleted safely. + // NOTE: We reuse the Worklist vector here for memory efficiency. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (isAlwaysLive(I.getInstructionIterator())) + continue; + + if (!Visited.count(I.getInstructionIterator()) && + (!I->getType()->isIntegerTy() || + !AliveBits.count(I.getInstructionIterator()))) { + DEBUG(dbgs() << "BDCE: Removing: " << *I.getInstructionIterator() << + " (unused)\n"); + Worklist.push_back(I.getInstructionIterator()); + I->dropAllReferences(); + } + } + + for (SmallVectorImpl::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + ++NumRemoved; + (*I)->eraseFromParent(); + } + + bool Changed = !Worklist.empty(); + Worklist.clear(); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (!I->getType()->isIntegerTy()) + continue; + + if (!AliveBits[I.getInstructionIterator()]) { + DEBUG(dbgs() << "BDCE: Trivializing: " << *I.getInstructionIterator() << + " (all bits dead)\n"); + Worklist.push_back(I.getInstructionIterator()); + } + } + + for (SmallVectorImpl::iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + Value *Zero = ConstantInt::get((*I)->getType(), 0); + ++NumSimplified; + (*I)->replaceAllUsesWith(Zero); + } + + Changed |= !Worklist.empty(); + return Changed; +} + +FunctionPass *llvm::createBitTrackingDCEPass() { + return new BDCE(); +} + Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp AlignmentFromAssumptions.cpp + BDCE.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -28,6 +28,7 @@ /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCEPass(Registry); + initializeBDCEPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); initializeSampleProfileLoaderPass(Registry); initializeConstantHoistingPass(Registry); @@ -83,6 +84,10 @@ unwrap(PM)->add(createAggressiveDCEPass()); } +void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createBitTrackingDCEPass()); +} + void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createAlignmentFromAssumptionsPass()); } Index: test/Transforms/BDCE/basic.ll =================================================================== --- /dev/null +++ test/Transforms/BDCE/basic.ll @@ -0,0 +1,348 @@ +; RUN: opt -S -bdce -instsimplify < %s | FileCheck %s +; RUN: opt -S -instsimplify < %s | FileCheck %s -check-prefix=CHECK-IO +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind readnone +define signext i32 @bar(i32 signext %x) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 4 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 8 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %shr = ashr i32 %or15, 4 + ret i32 %shr + +; CHECK-LABEL: @bar +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i32 + +; Check that instsimplify is not doing this all on its own. +; CHECK-IO-LABEL: @bar +; CHECK-IO: tail call signext i32 @foo(i32 signext 5) +; CHECK-IO: tail call signext i32 @foo(i32 signext 3) +; CHECK-IO: tail call signext i32 @foo(i32 signext 2) +; CHECK-IO: tail call signext i32 @foo(i32 signext 1) +; CHECK-IO: tail call signext i32 @foo(i32 signext 0) +; CHECK-IO: tail call signext i32 @foo(i32 signext 4) +; CHECK-IO: ret i32 +} + +; Function Attrs: nounwind readnone +declare signext i32 @foo(i32 signext) #0 + +; Function Attrs: nounwind readnone +define signext i32 @far(i32 signext %x) #1 { +entry: + %call = tail call signext i32 @goo(i32 signext 5) #1 + %and = and i32 %call, 4 + %or = or i32 %and, %x + %call1 = tail call signext i32 @goo(i32 signext 3) #1 + %and2 = and i32 %call1, 8 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @goo(i32 signext 2) #1 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @goo(i32 signext 1) #1 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @goo(i32 signext 0) #1 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @goo(i32 signext 4) #1 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %shr = ashr i32 %or15, 4 + ret i32 %shr + +; CHECK-LABEL: @far +; Calls to foo(5) and foo(3) are still there, but their results are not used. +; CHECK: tail call signext i32 @goo(i32 signext 5) +; CHECK-NEXT: tail call signext i32 @goo(i32 signext 3) +; CHECK-NEXT: tail call signext i32 @goo(i32 signext 2) +; CHECK: tail call signext i32 @goo(i32 signext 1) +; CHECK: tail call signext i32 @goo(i32 signext 0) +; CHECK: tail call signext i32 @goo(i32 signext 4) +; CHECK: ret i32 + +; Check that instsimplify is not doing this all on its own. +; CHECK-IO-LABEL: @far +; CHECK-IO: tail call signext i32 @goo(i32 signext 5) +; CHECK-IO: tail call signext i32 @goo(i32 signext 3) +; CHECK-IO: tail call signext i32 @goo(i32 signext 2) +; CHECK-IO: tail call signext i32 @goo(i32 signext 1) +; CHECK-IO: tail call signext i32 @goo(i32 signext 0) +; CHECK-IO: tail call signext i32 @goo(i32 signext 4) +; CHECK-IO: ret i32 +} + +declare signext i32 @goo(i32 signext) #1 + +; Function Attrs: nounwind readnone +define signext i32 @tar1(i32 signext %x) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 33554432 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 67108864 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %bs = tail call i32 @llvm.bswap.i32(i32 %or15) #0 + %shr = ashr i32 %bs, 4 + ret i32 %shr + +; CHECK-LABEL: @tar1 +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i32 +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.bswap.i32(i32) #0 + +; Function Attrs: nounwind readnone +define signext i32 @tar2(i32 signext %x) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 33554432 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 67108864 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %shl = shl i32 %or15, 10 + ret i32 %shl + +; CHECK-LABEL: @tar2 +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i32 +} + +; Function Attrs: nounwind readnone +define signext i32 @tar3(i32 signext %x) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 33554432 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 67108864 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %add = add i32 %or15, 5 + %shl = shl i32 %add, 10 + ret i32 %shl + +; CHECK-LABEL: @tar3 +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i32 +} + +; Function Attrs: nounwind readnone +define signext i32 @tar4(i32 signext %x) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 33554432 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 67108864 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %sub = sub i32 %or15, 5 + %shl = shl i32 %sub, 10 + ret i32 %shl + +; CHECK-LABEL: @tar4 +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i32 +} + +; Function Attrs: nounwind readnone +define signext i32 @tar5(i32 signext %x) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 33554432 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 67108864 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %xor = xor i32 %or15, 5 + %shl = shl i32 %xor, 10 + ret i32 %shl + +; CHECK-LABEL: @tar5 +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i32 +} + +; Function Attrs: nounwind readnone +define signext i32 @tar7(i32 signext %x, i1 %b) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 33554432 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 67108864 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %v = select i1 %b, i32 %or15, i32 5 + %shl = shl i32 %v, 10 + ret i32 %shl + +; CHECK-LABEL: @tar7 +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i32 +} + +; Function Attrs: nounwind readnone +define signext i16 @tar8(i32 signext %x) #0 { +entry: + %call = tail call signext i32 @foo(i32 signext 5) #0 + %and = and i32 %call, 33554432 + %or = or i32 %and, %x + %call1 = tail call signext i32 @foo(i32 signext 3) #0 + %and2 = and i32 %call1, 67108864 + %or3 = or i32 %or, %and2 + %call4 = tail call signext i32 @foo(i32 signext 2) #0 + %and5 = and i32 %call4, 16 + %or6 = or i32 %or3, %and5 + %call7 = tail call signext i32 @foo(i32 signext 1) #0 + %and8 = and i32 %call7, 32 + %or9 = or i32 %or6, %and8 + %call10 = tail call signext i32 @foo(i32 signext 0) #0 + %and11 = and i32 %call10, 64 + %or12 = or i32 %or9, %and11 + %call13 = tail call signext i32 @foo(i32 signext 4) #0 + %and14 = and i32 %call13, 128 + %or15 = or i32 %or12, %and14 + %tr = trunc i32 %or15 to i16 + ret i16 %tr + +; CHECK-LABEL: @tar8 +; CHECK-NOT: tail call signext i32 @foo(i32 signext 5) +; CHECK-NOT: tail call signext i32 @foo(i32 signext 3) +; CHECK: tail call signext i32 @foo(i32 signext 2) +; CHECK: tail call signext i32 @foo(i32 signext 1) +; CHECK: tail call signext i32 @foo(i32 signext 0) +; CHECK: tail call signext i32 @foo(i32 signext 4) +; CHECK: ret i16 +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + Index: test/Transforms/BDCE/dce-pure.ll =================================================================== --- /dev/null +++ test/Transforms/BDCE/dce-pure.ll @@ -0,0 +1,33 @@ +; RUN: opt -bdce -S < %s | FileCheck %s + +declare i32 @strlen(i8*) readonly nounwind + +define void @test1() { + call i32 @strlen( i8* null ) ; :1 [#uses=0] + ret void + +; CHECK-LABEL: @test1 +; CHECK-NOT: call +; CHECK: ret void +} + +define i32 @test2() { + ; invoke of pure function should not be deleted! + invoke i32 @strlen( i8* null ) readnone + to label %Cont unwind label %Other ; :1 [#uses=0] + +Cont: ; preds = %0 + ret i32 0 + +Other: ; preds = %0 + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup + ret i32 1 + +; CHECK-LABEL: @test2 +; CHECK: invoke +; CHECK: ret i32 1 +} + +declare i32 @__gxx_personality_v0(...) +