Index: include/llvm/Analysis/DemandedBits.h =================================================================== --- include/llvm/Analysis/DemandedBits.h +++ include/llvm/Analysis/DemandedBits.h @@ -57,6 +57,9 @@ /// Return true if, during analysis, I could not be reached. bool isInstructionDead(Instruction *I); + /// Return whether this use is dead by means of not having any demanded bits. + bool isUseDead(Use *U); + void print(raw_ostream &OS); private: @@ -75,6 +78,9 @@ // The set of visited instructions (non-integer-typed only). SmallPtrSet Visited; DenseMap AliveBits; + // Uses with no demanded bits. If the user also has no demanded bits, the use + // might not be stored explicitly in this map, to save memory during analysis. + SmallPtrSet DeadUses; }; class DemandedBitsWrapperPass : public FunctionPass { Index: lib/Analysis/DemandedBits.cpp =================================================================== --- lib/Analysis/DemandedBits.cpp +++ lib/Analysis/DemandedBits.cpp @@ -314,6 +314,7 @@ Visited.clear(); AliveBits.clear(); + DeadUses.clear(); SmallVector Worklist; @@ -374,26 +375,35 @@ Type *T = I->getType(); if (T->isIntOrIntVectorTy()) { unsigned BitWidth = T->getScalarSizeInBits(); + + // Previous demanded bits information for this use. + APInt ABPrev(BitWidth, 0); + auto ABI = AliveBits.find(I); + if (ABI != AliveBits.end()) + ABPrev = ABI->second; + APInt AB = APInt::getAllOnesValue(BitWidth); if (UserI->getType()->isIntOrIntVectorTy() && !AOut && !isAlwaysLive(UserI)) { + // If all bits of the output are dead, then all bits of the input + // are also dead. AB = APInt(BitWidth, 0); } else { - // If all bits of the output are dead, then all bits of the input // Bits of each operand that are used to compute alive bits of the // output are alive, all others are dead. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, Known, Known2); + + // Keep track of uses which have no demanded bits. + if (AB.isNullValue()) + DeadUses.insert(&OI); + else if (ABPrev.isNullValue()) + DeadUses.erase(&OI); } // If we've added to the set of alive bits (or the operand has not // been previously visited), then re-queue the operand to be visited // again. - APInt ABPrev(BitWidth, 0); - auto ABI = AliveBits.find(I); - if (ABI != AliveBits.end()) - ABPrev = ABI->second; - APInt ABNew = AB | ABPrev; if (ABNew != ABPrev || ABI == AliveBits.end()) { AliveBits[I] = std::move(ABNew); @@ -426,6 +436,31 @@ !isAlwaysLive(I); } +bool DemandedBits::isUseDead(Use *U) { + // We only track integer uses, everything else is assumed live. + if (!(*U)->getType()->isIntOrIntVectorTy()) + return false; + + // Uses by always-live instructions are never dead. + Instruction *UserI = cast(U->getUser()); + if (isAlwaysLive(UserI)) + return false; + + performAnalysis(); + if (DeadUses.count(U)) + return true; + + // If no output bits are demanded, no input bits are demanded and the use + // is dead. These uses might not be explicitly present in the DeadUses map. + if (UserI->getType()->isIntOrIntVectorTy()) { + auto Found = AliveBits.find(UserI); + if (Found != AliveBits.end() && Found->second.isNullValue()) + return true; + } + + return false; +} + void DemandedBits::print(raw_ostream &OS) { performAnalysis(); for (auto &KV : AliveBits) { Index: lib/Transforms/Scalar/BDCE.cpp =================================================================== --- lib/Transforms/Scalar/BDCE.cpp +++ lib/Transforms/Scalar/BDCE.cpp @@ -96,30 +96,38 @@ if (I.mayHaveSideEffects() && I.use_empty()) continue; - if (I.getType()->isIntOrIntVectorTy() && - !DB.getDemandedBits(&I).getBoolValue()) { - // For live instructions that have all dead bits, first make them dead by - // replacing all uses with something else. Then, if they don't need to - // remain live (because they have side effects, etc.) we can remove them. - LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + // Remove instructions not reached during analysis. + if (DB.isInstructionDead(&I)) { + salvageDebugInfo(I); + Worklist.push_back(&I); + I.dropAllReferences(); + Changed = true; + continue; + } + + for (Use &U : I.operands()) { + // DemandedBits only detects dead integer uses. + if (!U->getType()->isIntOrIntVectorTy()) + continue; + + // TODO: We could also find dead non-instruction uses, e.g. arguments. + if (!isa(U)) + continue; + + if (!DB.isUseDead(&U)) + continue; + + LLVM_DEBUG(dbgs() << "BDCE: Trivializing: " << U << " (all bits dead)\n"); clearAssumptionsOfUsers(&I, DB); // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. - Value *Zero = ConstantInt::get(I.getType(), 0); + U.set(ConstantInt::get(U->getType(), 0)); ++NumSimplified; - I.replaceNonMetadataUsesWith(Zero); Changed = true; } - if (!DB.isInstructionDead(&I)) - continue; - - salvageDebugInfo(I); - Worklist.push_back(&I); - I.dropAllReferences(); - Changed = true; } for (Instruction *&I : Worklist) { Index: test/Transforms/BDCE/dead-uses.ll =================================================================== --- test/Transforms/BDCE/dead-uses.ll +++ test/Transforms/BDCE/dead-uses.ll @@ -10,7 +10,7 @@ define i32 @pr39771_fshr_multi_use_instr(i32 %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr( ; CHECK-NEXT: [[X:%.*]] = or i32 [[A:%.*]], 0 -; CHECK-NEXT: [[B:%.*]] = tail call i32 @llvm.fshr.i32(i32 [[X]], i32 [[X]], i32 1) +; CHECK-NEXT: [[B:%.*]] = tail call i32 @llvm.fshr.i32(i32 0, i32 [[X]], i32 1) ; CHECK-NEXT: [[C:%.*]] = lshr i32 [[B]], 23 ; CHECK-NEXT: [[D:%.*]] = xor i32 [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and i32 [[D]], 31 @@ -28,7 +28,7 @@ define <2 x i32> @pr39771_fshr_multi_use_instr_vec(<2 x i32> %a) { ; CHECK-LABEL: @pr39771_fshr_multi_use_instr_vec( ; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[A:%.*]], zeroinitializer -; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X]], <2 x i32> [[X]], <2 x i32> ) +; CHECK-NEXT: [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> ) ; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], ; CHECK-NEXT: [[D:%.*]] = xor <2 x i32> [[C]], [[B]] ; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[D]],