Diff 93443

include/llvm/LinkAllPasses.h

Show First 20 Lines • Show All 87 Lines • ▼ Show 20 Lines	ForcePassLinking() {
(void) llvm::createDeadStoreEliminationPass();		(void) llvm::createDeadStoreEliminationPass();
(void) llvm::createDependenceAnalysisWrapperPass();		(void) llvm::createDependenceAnalysisWrapperPass();
(void) llvm::createDivergenceAnalysisPass();		(void) llvm::createDivergenceAnalysisPass();
(void) llvm::createDomOnlyPrinterPass();		(void) llvm::createDomOnlyPrinterPass();
(void) llvm::createDomPrinterPass();		(void) llvm::createDomPrinterPass();
(void) llvm::createDomOnlyViewerPass();		(void) llvm::createDomOnlyViewerPass();
(void) llvm::createDomViewerPass();		(void) llvm::createDomViewerPass();
(void) llvm::createGCOVProfilerPass();		(void) llvm::createGCOVProfilerPass();
		(void) llvm::createPDSEPass();
(void) llvm::createPGOInstrumentationGenLegacyPass();		(void) llvm::createPGOInstrumentationGenLegacyPass();
(void) llvm::createPGOInstrumentationUseLegacyPass();		(void) llvm::createPGOInstrumentationUseLegacyPass();
(void) llvm::createPGOIndirectCallPromotionLegacyPass();		(void) llvm::createPGOIndirectCallPromotionLegacyPass();
(void) llvm::createInstrProfilingLegacyPass();		(void) llvm::createInstrProfilingLegacyPass();
(void) llvm::createFunctionImportPass();		(void) llvm::createFunctionImportPass();
(void) llvm::createFunctionInliningPass();		(void) llvm::createFunctionInliningPass();
(void) llvm::createAlwaysInlinerLegacyPass();		(void) llvm::createAlwaysInlinerLegacyPass();
(void) llvm::createGlobalDCEPass();		(void) llvm::createGlobalDCEPass();
▲ Show 20 Lines • Show All 120 Lines • Show Last 20 Lines

include/llvm/Transforms/IPO/PassManagerBuilder.h

Show First 20 Lines • Show All 146 Lines • ▼ Show 20 Lines	public:
bool DisableUnrollLoops;		bool DisableUnrollLoops;
bool BBVectorize;		bool BBVectorize;
bool SLPVectorize;		bool SLPVectorize;
bool LoopVectorize;		bool LoopVectorize;
bool RerollLoops;		bool RerollLoops;
bool LoadCombine;		bool LoadCombine;
bool NewGVN;		bool NewGVN;
bool DisableGVNLoadPRE;		bool DisableGVNLoadPRE;
		bool PDSE;
bool VerifyInput;		bool VerifyInput;
bool VerifyOutput;		bool VerifyOutput;
bool MergeFunctions;		bool MergeFunctions;
bool PrepareForLTO;		bool PrepareForLTO;
bool PrepareForThinLTO;		bool PrepareForThinLTO;
bool PerformThinLTO;		bool PerformThinLTO;
bool DivergentTarget;		bool DivergentTarget;

▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines

include/llvm/Transforms/Scalar.h

	Show First 20 Lines • Show All 69 Lines • ▼ Show 20 Lines
	//			//
	// DeadStoreElimination - This pass deletes stores that are post-dominated by			// DeadStoreElimination - This pass deletes stores that are post-dominated by
	// must-aliased stores and are not loaded used between the stores.			// must-aliased stores and are not loaded used between the stores.
	//			//
	FunctionPass *createDeadStoreEliminationPass();			FunctionPass *createDeadStoreEliminationPass();

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
				// PDSE - This pass deletes both partially and fully redundant stores.
				//
				FunctionPass *createPDSEPass();

				//===----------------------------------------------------------------------===//
				//
	// AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This			// AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This
	// algorithm assumes instructions are dead until proven otherwise, which makes			// algorithm assumes instructions are dead until proven otherwise, which makes
	// it more successful are removing non-obviously dead instructions.			// it more successful are removing non-obviously dead instructions.
	//			//
	FunctionPass *createAggressiveDCEPass();			FunctionPass *createAggressiveDCEPass();


	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	▲ Show 20 Lines • Show All 479 Lines • Show Last 20 Lines

lib/Transforms/IPO/PassManagerBuilder.cpp

Show First 20 Lines • Show All 68 Lines • ▼ Show 20 Lines

static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),		static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
cl::Hidden,		cl::Hidden,
cl::desc("Run the load combining pass"));		cl::desc("Run the load combining pass"));

static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,		static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));		cl::desc("Run the NewGVN pass"));

		static cl::opt<bool> RunPDSE("enable-pdse", cl::init(false), cl::Hidden,
		cl::desc("Run with PDSE instead of DSE."));
		mehdi_aminiUnsubmitted Not Done Reply Inline Actions Is "PDSE" supposed to be a well known acronym? I feel that the description could be more friendly with `--help` readers. It also sounds strange to me that "Partial DSE" is "stronger" than "DSE" alone. mehdi_amini: Is "PDSE" supposed to be a well known acronym? I feel that the description could be more…
		davideUnsubmitted Not Done Reply Inline Actions It doesn't to me, at all. See, for example, PRE (partial redundancy elimination) which is stronger than full redundancy elimination (once we agree on the meaning of stronger). davide: It doesn't to me, at all. See, for example, PRE (partial redundancy elimination) which is…
		filcabUnsubmitted Not Done Reply Inline Actions I think it helps if you look at it as (PDS)E, and not as P(DSE) :-) filcab: I think it helps if you look at it as (PDS)E, and not as P(DSE) :-)

static cl::opt<bool>		static cl::opt<bool>
RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",		RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
cl::init(true), cl::Hidden,		cl::init(true), cl::Hidden,
cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "		cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
"vectorizer instead of before"));		"vectorizer instead of before"));

// Experimental option to use CFL-AA		// Experimental option to use CFL-AA
enum class CFLAAType { None, Steensgaard, Andersen, Both };		enum class CFLAAType { None, Steensgaard, Andersen, Both };
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	PassManagerBuilder::PassManagerBuilder() {
DisableUnitAtATime = false;		DisableUnitAtATime = false;
DisableUnrollLoops = false;		DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;		BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;		SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;		LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;		RerollLoops = RunLoopRerolling;
LoadCombine = RunLoadCombine;		LoadCombine = RunLoadCombine;
NewGVN = RunNewGVN;		NewGVN = RunNewGVN;
		PDSE = RunPDSE;
DisableGVNLoadPRE = false;		DisableGVNLoadPRE = false;
VerifyInput = false;		VerifyInput = false;
VerifyOutput = false;		VerifyOutput = false;
MergeFunctions = false;		MergeFunctions = false;
PrepareForLTO = false;		PrepareForLTO = false;
EnablePGOInstrGen = RunPGOInstrGen;		EnablePGOInstrGen = RunPGOInstrGen;
PGOInstrGen = PGOOutputFile;		PGOInstrGen = PGOOutputFile;
PGOInstrUse = RunPGOInstrUse;		PGOInstrUse = RunPGOInstrUse;
▲ Show 20 Lines • Show All 169 Lines • ▼ Show 20 Lines	void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations		MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations

// Run instcombine after redundancy elimination to exploit opportunities		// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.		// opened up by them.
addInstructionCombiningPass(MPM);		addInstructionCombiningPass(MPM);
addExtensionsToPM(EP_Peephole, MPM);		addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createJumpThreadingPass()); // Thread jumps		MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());		MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores		MPM.add(PDSE ? createPDSEPass()
		: createDeadStoreEliminationPass()); // Delete dead stores
MPM.add(createLICMPass());		MPM.add(createLICMPass());

addExtensionsToPM(EP_ScalarOptimizerLate, MPM);		addExtensionsToPM(EP_ScalarOptimizerLate, MPM);

if (RerollLoops)		if (RerollLoops)
MPM.add(createLoopRerollPass());		MPM.add(createLoopRerollPass());
if (!RunSLPAfterLoopVectorization) {		if (!RunSLPAfterLoopVectorization) {
if (SLPVectorize)		if (SLPVectorize)
▲ Show 20 Lines • Show All 416 Lines • ▼ Show 20 Lines	void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.		PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.

PM.add(createLICMPass()); // Hoist loop invariants.		PM.add(createLICMPass()); // Hoist loop invariants.
PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.		PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(NewGVN ? createNewGVNPass()		PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.		: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.		PM.add(createMemCpyOptPass()); // Remove dead memcpys.

// Nuke dead stores.		PM.add(PDSE ? createPDSEPass() // Really nuke dead stores.
PM.add(createDeadStoreEliminationPass());		: createDeadStoreEliminationPass()); // Nuke dead stores.

// More loops are countable; try to optimize them.		// More loops are countable; try to optimize them.
PM.add(createIndVarSimplifyPass());		PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());		PM.add(createLoopDeletionPass());
if (EnableLoopInterchange)		if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());		PM.add(createLoopInterchangePass());

if (!DisableUnrollLoops)		if (!DisableUnrollLoops)
▲ Show 20 Lines • Show All 197 Lines • Show Last 20 Lines

lib/Transforms/Scalar/PDSE.cpp

	Show All 31 Lines
	// the true block) and can be made fully redundant by inserting a copy into the			// the true block) and can be made fully redundant by inserting a copy into the
	// false block.			// false block.
	//			//
	// For a gentler introduction to PRE, see:			// For a gentler introduction to PRE, see:
	//			//
	// Partial Redundancy Elimination in SSA Form			// Partial Redundancy Elimination in SSA Form
	// https://doi.org/10.1145/319301.319348			// https://doi.org/10.1145/319301.319348
	//			//
	// Differences between the papers and this implementation:			// - TODO: Handle partial overwrite tracking during the full redundancy
	// - May-throw instructions count as killing occurrences in the factored			// elimination phase.
	// redundancy graph of escaping stores;
	// - TODO: Figure out partial overwrite tracking.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

				#include "llvm/ADT/DenseSet.h"
				#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/AliasAnalysis.h"			#include "llvm/Analysis/AliasAnalysis.h"
				#include "llvm/Analysis/CaptureTracking.h"
	#include "llvm/Analysis/GlobalsModRef.h"			#include "llvm/Analysis/GlobalsModRef.h"
				#include "llvm/Analysis/IteratedDominanceFrontier.h"
				#include "llvm/Analysis/MemoryBuiltins.h"
	#include "llvm/Analysis/PostDominators.h"			#include "llvm/Analysis/PostDominators.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"			#include "llvm/Analysis/TargetLibraryInfo.h"
				#include "llvm/Analysis/ValueTracking.h"
				#include "llvm/IR/AssemblyAnnotationWriter.h"
				#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/Pass.h"			#include "llvm/Pass.h"
				#include "llvm/Support/FormattedStream.h"
	#include "llvm/Transforms/Scalar.h"			#include "llvm/Transforms/Scalar.h"
	#include "llvm/Transforms/Scalar/PDSE.h"			#include "llvm/Transforms/Scalar/PDSE.h"
				#include "llvm/Transforms/Utils/BasicBlockUtils.h"
				#include "llvm/Transforms/Utils/Local.h"
				#include "llvm/Transforms/Utils/SSAUpdater.h"

				#include <forward_list>
				#include <list>

	#define DEBUG_TYPE "pdse"			#define DEBUG_TYPE "pdse"

	using namespace llvm;			using namespace llvm;

				STATISTIC(NumStores, "Number of stores deleted");
				STATISTIC(NumPartialReds, "Number of partial redundancies converted.");

	static cl::opt<bool>			static cl::opt<bool>
	PrintFRG("print-frg", cl::init(false), cl::Hidden,			PrintFRG("print-frg", cl::init(false), cl::Hidden,
	cl::desc("Print the factored redundancy graph of stores."));			cl::desc("Print the factored redundancy graph of stores."));

	namespace {			namespace {
	bool runPDSE(Function &F, AliasAnalysis &AA, const PostDominatorTree &PDT,			// Representations of factored redundancy graph elements.
	const TargetLibraryInfo &TLI) {			enum struct OccTy {
	if (PrintFRG) {			Real,
	DEBUG(dbgs() << "TODO: Print factored redundancy graph.\n");			Lambda,
				};

				struct RealOcc;
				struct LambdaOcc;

				// Indexes PDSE.Worklist.
				using RedIdx = unsigned;

				// Indexes LambdaOcc::Flags
				using SubIdx = unsigned;

				struct Occurrence {
				unsigned ID;
				RedIdx Class;
				// ^ Index of the redundancy class that this belongs to.
				OccTy Type;

				const RealOcc *isReal() const {
				filcabUnsubmitted Not Done Reply Inline Actions `isReal`/`isLambda` sound like they return booleans. Code like `X->isReal()->isLambda()` looks weird. In my opinion, `asReal`/`asLambda` (or similar) would be easier to read (exact same behavior: If they're the type that we're asking for, return the pointer. Otherwise return `nullptr`). filcab: `isReal`/`isLambda` sound like they return booleans. Code like `X->isReal()->isLambda()` looks…
				return Type == OccTy::Real ? reinterpret_cast<const RealOcc *>(this)
				: nullptr;
				}

				const LambdaOcc *isLambda() const {
				return Type == OccTy::Lambda ? reinterpret_cast<const LambdaOcc *>(this)
				: nullptr;
				}

				RealOcc *isReal() {
				return Type == OccTy::Real ? reinterpret_cast<RealOcc *>(this) : nullptr;
				}

				LambdaOcc *isLambda() {
				return Type == OccTy::Lambda ? reinterpret_cast<LambdaOcc *>(this)
				: nullptr;
				}
				};

				struct RedClass;

				struct RealOcc final : public Occurrence {
				SubIdx Subclass;
				Instruction *Inst;
				Occurrence *Def;
				MemoryLocation KillLoc;

				RealOcc(unsigned ID, Instruction &I)
				: Occurrence{ID, -1u, OccTy::Real}, Subclass(-1u), Inst(&I), KillLoc() {}

				RealOcc(unsigned ID, Instruction &I, MemoryLocation &&KillLoc)
				: Occurrence{ID, -1u, OccTy::Real}, Subclass(-1u), Inst(&I),
				KillLoc(KillLoc) {}

				bool canDSE() const {
				if (auto *SI = dyn_cast<StoreInst>(Inst)) {
				return SI->isUnordered();
				} else if (auto *MI = dyn_cast<MemIntrinsic>(Inst)) {
				return !MI->isVolatile();
				} else {
				llvm_unreachable("Unknown real occurrence type.");
				}
				}

				RedIdx setClass(RedIdx Class_, SubIdx Subclass_) {
				Subclass = Subclass_;
				return Class = Class_;
				}

				raw_ostream &print(raw_ostream &, ArrayRef<RedClass>) const;
				};

				Value *getStoreOp(Instruction &);
				Instruction &setStoreOp(Instruction &, Value &);

				bool hasMultiplePreds(const BasicBlock &BB, bool AlreadyOnePred = true) {
				filcabUnsubmitted Not Done Reply Inline Actions This is always called without the second parameter. It seems code would be simpler if you just took a pointer as an argument. filcab: This is always called without the second parameter. It seems code would be simpler if you just…
				auto B = pred_begin(&BB);
				auto E = pred_end(&BB);
				return (AlreadyOnePred \|\| B != E) && ++B != E;
				}

				struct LambdaOcc final : public Occurrence {
				struct Operand {
				BasicBlock *Succ;
				Occurrence *Inner;

				RealOcc *hasRealUse() const { return Inner->isReal(); }

				LambdaOcc *getLambda() const {
				return Inner->isReal() ? Inner->isReal()->isLambda() : Inner->isLambda();
				filcabUnsubmitted Not Done Reply Inline Actions Why `isReal()->isLambda()`? Won't the `isLambda()` always return `nullptr`? (`Type` can't be both `OccTy::Real` and `OccTy::Lambda`) filcab: Why `isReal()->isLambda()`? Won't the `isLambda()` always return `nullptr`? (`Type` can't be…
				}

				Operand(BasicBlock &Succ, Occurrence &Inner) : Succ(&Succ), Inner(&Inner) {}
				};

				struct RealUse {
				RealOcc *Occ;

				Instruction &getInst() const { return *Occ->Inst; }
				};

				struct LambdaUse {
				LambdaOcc *L;
				size_t OpIdx;

				Operand &getOp() const { return L->Defs[OpIdx]; }

				LambdaUse(LambdaOcc &L, size_t OpIdx) : L(&L), OpIdx(OpIdx) {}
				};

				BasicBlock *Block;
				SmallVector<Operand, 4> Defs;
				SmallVector<BasicBlock *, 4> NullDefs;
				SmallVector<RealUse, 4> Uses;
				// ^ Real occurrences for which this lambda is representative (a def). Each
				// use will be in the same redundancy class as this lambda (meaning that the
				// stores they represent are same-sized and must-alias), but can have
				// different sub-class indexes (memset, memcpy, plain store, etc.).
				davideUnsubmitted Not Done Reply Inline Actions We generally put the comment above the field and not below, can you change those? davide: We generally put the comment above the field and not below, can you change those?
				SmallVector<LambdaUse, 4> LambdaUses;
				// ^ These lambdas either directly use this lambda, or use a real use of this
				// lambda. Needed to propagate `CanBeAnt` and `Earlier`.

				struct SubFlags {
				// Consult the Kennedy et al. paper for these.
				bool UpSafe;
				bool CanBeAnt;
				bool Earlier;

				SubFlags() : UpSafe(true), CanBeAnt(true), Earlier(true) {}
				};

				std::vector<SubFlags> Flags;
				// ^ Anticipation computation, indexed by subclass.

				bool upSafe(SubIdx Sub) const { return Flags[Sub].UpSafe; }

				bool canBeAnt(SubIdx Sub) const { return Flags[Sub].CanBeAnt; }

				bool earlier(SubIdx Sub) const { return Flags[Sub].Earlier; }

				LambdaOcc(unsigned ID, BasicBlock &Block, RedIdx Class,
				unsigned NumSubclasses)
				: Occurrence{ID, Class, OccTy::Lambda}, Block(&Block), Defs(), NullDefs(),
				Uses(), LambdaUses(), Flags(NumSubclasses) {}

				void addUse(RealOcc &Occ) { Uses.push_back({&Occ}); }

				void addUse(LambdaOcc &L, size_t OpIdx) { LambdaUses.emplace_back(L, OpIdx); }

				LambdaOcc &addOperand(BasicBlock &Succ, Occurrence *ReprOcc) {
				if (ReprOcc) {
				Defs.emplace_back(Succ, *ReprOcc);
				if (LambdaOcc *L = Defs.back().getLambda())
				L->addUse(*this, Defs.size() - 1);
				} else
				NullDefs.push_back(&Succ);
				return *this;
				}

				// Does this lambda have null defs that can't be filled by PRE? Yes, if:
				bool hasNullNonInsertable() const {
				// - Any of the defs reside on a critical edge that can't be split.
				return isa<IndirectBrInst>(Block->getTerminator()) &&
				any_of(NullDefs, [](const BasicBlock *Succ) {
				return hasMultiplePreds(*Succ);
				});
				// TODO: EH pads shouldn't be split either.
				}
				dberlinUnsubmitted Not Done Reply Inline Actions FWIW: DenseMap<const BasicBlock , iplist<RealOcc>> will work. Just derive RealOcc from ilist_node<RealOcc> dberlin:* FWIW: DenseMap<const BasicBlock *, iplist<RealOcc>> will work. Just derive RealOcc from…

				void resetUpSafe() {
				for (SubFlags &F : Flags)
				F.UpSafe = false;
				}

				void resetUpSafe(SubIdx Sub) { Flags[Sub].UpSafe = false; }

				void resetUpSafeExcept(SubIdx Sub_) {
				for (SubIdx Sub = 0; Sub < Flags.size(); Sub += 1)
				if (Sub_ != Sub)
				Flags[Sub].UpSafe = false;
				}

				void resetCanBeAnt(SubIdx Sub) {
				Flags[Sub].CanBeAnt = Flags[Sub].Earlier = false;
				}

				void resetEarlier(SubIdx Sub) { Flags[Sub].Earlier = false; }

				bool willBeAnt(SubIdx Sub) const {
				return Flags[Sub].CanBeAnt && !Flags[Sub].Earlier;
				}

				raw_ostream &print(raw_ostream &, ArrayRef<RedClass>, bool = false,
				SubIdx * = nullptr) const;
				};

				// Factored redundancy graph representation for each maximal group of
				// must-aliasing stores.
				struct RedClass {
				MemoryLocation Loc;
				// ^ The memory location that each RealOcc mods and must-alias.
				filcabUnsubmitted Not Done Reply Inline Actions We usually comment above the variable. filcab: We usually comment above the variable.
				SmallVector<RedIdx, 8> Overwrites;
				filcabUnsubmitted Not Done Reply Inline Actions Did you do any measurements/reasoning for these sizes, btw? (Just asking. No need to worry too much for now) filcab: Did you do any measurements/reasoning for these sizes, btw? (Just asking. No need to worry too…
				// ^ Indices of redundancy classes that this class can DSE.
				SmallVector<RedIdx, 8> Interferes;
				// ^ Indices of redundancy classes that may-alias this class.
				bool Escapes;
				// ^ Upon function unwind, can Loc escape?
				bool Returned;
				// ^ Is Loc returned by the function?
				SmallVector<LambdaOcc *, 8> Lambdas;
				std::vector<Instruction *> StoreTypes;

				DenseMap<std::pair<unsigned, Type *>, SubIdx> Subclasses;
				SmallPtrSet<BasicBlock *, 8> DefBlocks;

				RedClass(MemoryLocation Loc, bool Escapes, bool Returned)
				: Loc(std::move(Loc)), Escapes(Escapes), Returned(Returned), Lambdas(),
				StoreTypes(), Subclasses(), DefBlocks() {}

				private:
				using LambdaStack = SmallVector<LambdaOcc *, 16>;

				// All of the lambda occ refinement phases follow this depth-first structure
				// to propagate some lambda flag from an initial set to the rest of the graph.
				// Consult figures 8 and 10 of Kennedy et al.
				void depthFirst(std::function<void(LambdaOcc &, LambdaStack &)> push,
				std::function<bool(LambdaOcc &)> initial,
				std::function<bool(LambdaOcc &L)> alreadyTraversed) {
				LambdaStack Stack;

				for (LambdaOcc *L : Lambdas)
				if (initial(*L))
				push(*L, Stack);

				while (!Stack.empty()) {
				LambdaOcc &L = *Stack.pop_back_val();
				if (!alreadyTraversed(L))
				push(L, Stack);
				}
				}

				// If lambda P is repr occ to an operand of lambda Q and:
				// - Q is up-unsafe (i.e., there is a reverse path from Q to function entry
				// that doesn't cross any real occs of Q's class), and
				// - there are no real occs from P to Q,
				// then we can conclude that P is up-unsafe too. We use this to propagate
				// up-unsafety to the rest of the FRG.
				RedClass &propagateUpUnsafe(SubIdx Sub) {
				auto push = [&](LambdaOcc &L, LambdaStack &Stack) {
				L.resetUpSafe(Sub);
				for (LambdaOcc::Operand &Op : L.Defs)
				if (!Op.hasRealUse())
				if (LambdaOcc *L = Op.getLambda())
				Stack.push_back(L);
				};
				auto initialCond = [&](LambdaOcc &L) { return !L.upSafe(Sub); };
				// If the top entry of the lambda stack is up-unsafe, then it and its
				// operands already been traversed.
				auto &alreadyTraversed = initialCond;

				depthFirst(push, initialCond, alreadyTraversed);
				return *this;
				}

				RedClass &computeCanBeAnt(SubIdx Sub) {
				auto push = [&](LambdaOcc &L, LambdaStack &Stack) {
				L.resetCanBeAnt(Sub);
				for (LambdaOcc::LambdaUse &Use : L.LambdaUses)
				if (Use.L->canBeAnt(Sub) && !Use.getOp().hasRealUse() &&
				(!Use.L->upSafe(Sub) \|\|
				(isa<IndirectBrInst>(Use.L->Block->getTerminator()) &&
				hasMultiplePreds(*Use.getOp().Succ))))
				Stack.push_back(Use.L);
				};
				auto initialCond = [&](LambdaOcc &L) {
				return (!L.upSafe(Sub) && L.canBeAnt(Sub) && !L.NullDefs.empty()) \|\|
				L.hasNullNonInsertable();
				};
				auto alreadyTraversed = [&](LambdaOcc &L) { return !L.canBeAnt(Sub); };

				depthFirst(push, initialCond, alreadyTraversed);
				return *this;
				}

				RedClass &computeEarlier(SubIdx Sub) {
				auto push = [&](LambdaOcc &L, LambdaStack &Stack) {
				L.resetEarlier(Sub);
				for (LambdaOcc::LambdaUse &Use : L.LambdaUses)
				if (Use.L->earlier(Sub))
				Stack.push_back(Use.L);
				};
				auto initialCond = [&](LambdaOcc &L) {
				return L.earlier(Sub) && any_of(L.Defs, [](const LambdaOcc::Operand &Op) {
				return Op.hasRealUse();
				});
				};
				auto alreadyTraversed = [&](LambdaOcc &L) { return !L.earlier(Sub); };

				depthFirst(push, initialCond, alreadyTraversed);
				return *this;
				}

				public:
				RedClass &computeWillBeAnt() {
				if (Lambdas.size() > 0) {
				DEBUG(dbgs() << "Computing willBeAnt\n");
				for (SubIdx Sub = 0; Sub < numSubclasses(); Sub += 1)
				propagateUpUnsafe(Sub).computeCanBeAnt(Sub).computeEarlier(Sub);
				}
				return *this;
				}

				SubIdx numSubclasses() const { return StoreTypes.size(); }

				friend raw_ostream &operator<<(raw_ostream &O, const RedClass &Class) {
				return O << *Class.Loc.Ptr << " x " << Class.Loc.Size;
				}
				};

				raw_ostream &RealOcc::print(raw_ostream &O, ArrayRef<RedClass> Worklist) const {
				return ID ? (O << "Real @ " << Inst->getParent()->getName() << " ("
				<< Worklist[Class] << ") " << *Inst)
				: (O << "DeadOnExit");
				}

				raw_ostream &LambdaOcc::print(raw_ostream &O, ArrayRef<RedClass> Worklist,
				bool UsesDefs, SubIdx *Sub) const {
				O << "Lambda @ " << Block->getName() << " (" << Worklist[Class] << ")";
				if (Sub)
				dbgs() << " [" << (upSafe(*Sub) ? "U " : "!U ")
				<< (canBeAnt(Sub) ? "C " : "!C ") << (earlier(Sub) ? "E " : "!E ")
				<< (willBeAnt(*Sub) ? "W" : "!W") << "]";
				if (UsesDefs) {
				O << "\n";
				for (const LambdaOcc::RealUse &Use : Uses)
				Use.Occ->print(dbgs() << "\tUse: ", Worklist) << "\n";
				for (const LambdaOcc::Operand &Def : Defs)
				if (RealOcc *Occ = Def.hasRealUse())
				Occ->print(dbgs() << "\tDef: ", Worklist) << "\n";
				else
				Def.getLambda()->print(dbgs() << "\tDef: ", Worklist) << "\n";
				for (const BasicBlock *BB : NullDefs)
				dbgs() << "\tDef: _\|_ @ " << BB->getName() << "\n";
				}
				return O;
				}

				class EscapeTracker {
				const DataLayout &DL;
				const TargetLibraryInfo &TLI;
				DenseSet<const Value *> NonEscapes;
				DenseSet<const Value *> Returns;

				public:
				bool escapesOnUnwind(const Value *V) {
				if (NonEscapes.count(V))
				return false;
				if (isa<AllocaInst>(V) \|\|
				(isAllocLikeFn(V, &TLI) && !PointerMayBeCaptured(V, false, true))) {
				NonEscapes.insert(V);
	return false;			return false;
				}
				return true;
				}

				bool escapesOnUnwind(const MemoryLocation &Loc) {
				return escapesOnUnwind(GetUnderlyingObject(Loc.Ptr, DL));
				}

				bool returned(const Value *V) const { return Returns.count(V); }

				bool returned(const MemoryLocation &Loc) const {
				return returned(GetUnderlyingObject(Loc.Ptr, DL));
				}

				EscapeTracker(Function &F, const TargetLibraryInfo &TLI)
				: DL(F.getParent()->getDataLayout()), TLI(TLI) {
				// Record non-escaping args.
				for (Argument &Arg : F.args())
				if (Arg.hasByValOrInAllocaAttr())
				NonEscapes.insert(&Arg);

				// Record return values.
				for (BasicBlock &BB : F)
				if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
				if (Value *RetVal = RI->getReturnValue())
				Returns.insert(GetUnderlyingObject(RetVal, DL));
				}
				};

				Value *getStoreOp(Instruction &I) {
				if (auto *SI = dyn_cast<StoreInst>(&I)) {
				return SI->getValueOperand();
				} else if (auto *MI = dyn_cast<MemSetInst>(&I)) {
				return MI->getValue();
				} else if (auto *MI = dyn_cast<MemTransferInst>(&I)) {
				return MI->getRawSource();
				davideUnsubmitted Not Done Reply Inline Actions you don't need `else` after `return`. davide: you don't need `else` after `return`.
	} else {			} else {
	DEBUG(dbgs() << "Dummy PDSE pass.\n");			llvm_unreachable("Unknown real occurrence type.");
				}
				}

				Instruction &setStoreOp(Instruction &I, Value &V) {
				if (auto *SI = dyn_cast<StoreInst>(&I)) {
				SI->setOperand(0, &V);
				} else if (auto *MI = dyn_cast<MemSetInst>(&I)) {
				MI->setValue(&V);
				} else if (auto *MI = dyn_cast<MemTransferInst>(&I)) {
				MI->setSource(&V);
				} else {
				llvm_unreachable("Unknown real occurrence type.");
				}
				return I;
				}

				// If Inst has the potential to be a DSE candidate, return its write location
				// and a real occurrence wrapper.
				Optional<std::pair<MemoryLocation, RealOcc>> makeRealOcc(Instruction &I,
				unsigned &NextID) {
				using std::make_pair;
				if (auto *SI = dyn_cast<StoreInst>(&I)) {
				return make_pair(MemoryLocation::get(SI), RealOcc(NextID++, I));
				} else if (auto *MI = dyn_cast<MemSetInst>(&I)) {
				filcabUnsubmitted Not Done Reply Inline Actions Nit: We can do without the `else` in these. filcab: Nit: We can do without the `else` in these.
				return make_pair(MemoryLocation::getForDest(MI), RealOcc(NextID++, I));
				} else if (auto *MI = dyn_cast<MemTransferInst>(&I)) {
				// memmove, memcpy.
				return make_pair(MemoryLocation::getForDest(MI),
				RealOcc(NextID++, I, MemoryLocation::getForSource(MI)));
				}
				return None;
				}

				class InstOrReal {
				union {
				Instruction *I;
				RealOcc Occ;
				};
				bool IsOcc;

				public:
				InstOrReal(RealOcc &&Occ) : Occ(std::move(Occ)), IsOcc(true) {}

				InstOrReal(Instruction &I) : I(&I), IsOcc(false) {}

				const RealOcc *getOcc() const { return IsOcc ? &Occ : nullptr; }

				RealOcc *getOcc() { return IsOcc ? &Occ : nullptr; }

				Instruction *getInst() const { return IsOcc ? nullptr : I; }
				};

				struct BlockInfo {
				std::list<InstOrReal> Insts;
				std::list<LambdaOcc> Lambdas;
				};

				struct PDSE {
				Function &F;
				AliasAnalysis &AA;
				PostDominatorTree &PDT;
				const TargetLibraryInfo &TLI;

				unsigned NextID;
				DenseMap<std::pair<RedIdx, const Instruction *>, ModRefInfo> MRI;
				// ^ Caches calls to AliasAnalysis::getModRefInfo.
				DenseMap<const BasicBlock *, BlockInfo> Blocks;
				std::forward_list<Instruction *> DeadStores;
				davideUnsubmitted Not Done Reply Inline Actions why a forward list? davide: why a forward list?
				std::vector<RedClass> Worklist;
				RealOcc DeadOnExit;
				// ^ A faux occurrence used to detect stores to non-escaping memory that are
				// redundant with respect to function exit.

				PDSE(Function &F, AliasAnalysis &AA, PostDominatorTree &PDT,
				const TargetLibraryInfo &TLI)
				: F(F), AA(AA), PDT(PDT), TLI(TLI), NextID(1),
				DeadOnExit(0, *F.getEntryBlock().getTerminator()) {}

				ModRefInfo getModRefInfo(RedIdx A, const Instruction &I) {
				auto Key = std::make_pair(A, &I);
				return MRI.count(Key) ? MRI[Key]
				: (MRI[Key] = AA.getModRefInfo(&I, Worklist[A].Loc));
				}

				RedIdx classifyLoc(const MemoryLocation &Loc,
				DenseMap<MemoryLocation, RedIdx> &BelongsToClass,
				EscapeTracker &Tracker) {
				DEBUG(dbgs() << "Examining store location " << *Loc.Ptr << " x " << Loc.Size
				<< "\n");
				if (BelongsToClass.count(Loc))
				return BelongsToClass[Loc];

				std::vector<AliasResult> CachedAliases(Worklist.size());
				for (RedIdx Idx = 0; Idx < Worklist.size(); Idx += 1) {
				RedClass &Class = Worklist[Idx];
				if ((CachedAliases[Idx] = AA.alias(Class.Loc, Loc)) == MustAlias &&
				Class.Loc.Size == Loc.Size)
				return BelongsToClass[Loc] = Idx;
				}

				// Loc doesn't belong to any existing RedClass, so start a new one.
				Worklist.emplace_back(Loc, Tracker.escapesOnUnwind(Loc),
				Tracker.returned(Loc));
				RedIdx NewIdx = BelongsToClass[Worklist.back().Loc] = Worklist.size() - 1;

				// Copy must-/may-aliases into Overwrites and Interferes.
				for (RedIdx Idx = 0; Idx < CachedAliases.size(); Idx += 1) {
				if (CachedAliases[Idx] == MustAlias) {
				assert(Worklist[NewIdx].Loc.Size != Worklist[Idx].Loc.Size &&
				"Loc should have been part of redundancy class Idx.");
				if (Worklist[NewIdx].Loc.Size >= Worklist[Idx].Loc.Size)
				Worklist[NewIdx].Overwrites.push_back(Idx);
				else if (Worklist[NewIdx].Loc.Size <= Worklist[Idx].Loc.Size)
				Worklist[Idx].Overwrites.push_back(NewIdx);
				} else if (CachedAliases[Idx] != NoAlias) {
				Worklist[Idx].Interferes.push_back(NewIdx);
				Worklist[NewIdx].Interferes.push_back(Idx);
				}
				}
				return NewIdx;
				}

				struct RenameState {
				struct Incoming {
				Occurrence *ReprOcc;
				};

				std::vector<Incoming> States;

				RenameState(ArrayRef<RedClass> Worklist, RealOcc &DeadOnExit)
				: States(Worklist.size()) {
				for (RedIdx Idx = 0; Idx < Worklist.size(); Idx += 1)
				if (!Worklist[Idx].Escapes && !Worklist[Idx].Returned) {
				DEBUG(dbgs() << "Class " << Worklist[Idx] << " is dead on exit.\n");
				States[Idx] = {&DeadOnExit};
				}
				}

				bool live(RedIdx Idx) const { return States[Idx].ReprOcc; }

				LambdaOcc *exposedLambda(RedIdx Idx) const {
				return live(Idx) ? States[Idx].ReprOcc->isLambda() : nullptr;
				}

				RealOcc *exposedRepr(RedIdx Idx) const {
				return live(Idx) ? States[Idx].ReprOcc->isReal() : nullptr;
				}
				};

				void updateUpSafety(RedIdx Idx, RenameState &S) {
				if (LambdaOcc *L = S.exposedLambda(Idx)) {
				DEBUG(L->print(dbgs() << "Setting up-unsafe: ", Worklist) << "\n");
				L->resetUpSafe();
				}
				}

				void kill(RedIdx Idx, RenameState &S) {
				DEBUG(dbgs() << "Killing class " << Worklist[Idx] << "\n");
				updateUpSafety(Idx, S);
				S.States[Idx] = {nullptr};
				}

				void handleRealOcc(RealOcc &Occ, RenameState &S) {
				DEBUG(Occ.print(dbgs() << "Hit a new occ: ", Worklist) << "\n");
				if (!S.live(Occ.Class)) {
				DEBUG(dbgs() << "Setting to new repr of " << Worklist[Occ.Class] << "\n");
				S.States[Occ.Class] = RenameState::Incoming{&Occ};
				} else if (LambdaOcc *L = S.exposedLambda(Occ.Class)) {
				L->resetUpSafeExcept(Occ.Subclass);
				L->addUse(Occ);
				S.States[Occ.Class] = RenameState::Incoming{&Occ};
				}

				// Examine interactions with its store loc.
				for (RedIdx Idx : Worklist[Occ.Class].Interferes)
				updateUpSafety(Idx, S);
				for (RedIdx Idx : Worklist[Occ.Class].Overwrites)
				if (!S.live(Idx)) {
				// Any of Idx's occs post-dommed by Occ can be DSE-ed (barring some
				// intervening load that aliases Idx). Since Idx is _\|_, this occ is
				// Idx's new repr.
				DEBUG(dbgs() << "Setting to repr of smaller: " << Worklist[Idx]
				<< "\n");
				S.States[Idx] = RenameState::Incoming{&Occ};
				} else
				// Otherwise, if Idx is a lambda, this occ stomps its up-safety.
				updateUpSafety(Idx, S);

				// Find out how Occ's KillLoc, if any, interacts with incoming occ classes.
				if (Occ.KillLoc.Ptr)
				for (RedIdx Idx = 0; Idx < Worklist.size(); Idx += 1)
				if (S.live(Idx) &&
				AA.alias(Worklist[Idx].Loc, Occ.KillLoc) != NoAlias) {
				DEBUG(dbgs() << "KillLoc aliases: "
				<< AA.alias(Worklist[Idx].Loc, Occ.KillLoc) << " ");
				kill(Idx, S);
				}
				}

				void handleMayKill(Instruction &I, RenameState &S) {
				for (RedIdx Idx = 0; Idx < S.States.size(); Idx += 1)
				if (S.live(Idx) && Worklist[Idx].Escapes && I.mayThrow()) {
				kill(Idx, S);
				} else if (S.live(Idx)) {
				// TODO: Handle calls to `free` as DeadOnExit.
				ModRefInfo MRI = getModRefInfo(Idx, I);
				if (MRI & MRI_Ref)
				// Aliasing load
				kill(Idx, S);
				else if (MRI & MRI_Mod)
				// Aliasing store
				updateUpSafety(Idx, S);
				}
				}

				void dse(Instruction &I) {
				DEBUG(dbgs() << "DSE-ing " << I << " (" << I.getParent()->getName()
				<< ")\n");
				++NumStores;
				DeadStores.push_front(&I);
				}

				RenameState renameBlock(BasicBlock &BB, RenameState S) {
				DEBUG(dbgs() << "Entering block " << BB.getName() << "\n");
				// Set repr occs to lambdas, if present.
				for (LambdaOcc &L : Blocks[&BB].Lambdas)
				S.States[L.Class] = {&L};

				// Simultaneously rename and DSE in post-order.
				for (InstOrReal &I : reverse(Blocks[&BB].Insts))
				if (RealOcc *Occ = I.getOcc()) {
				if (Occ->canDSE() && S.exposedRepr(Occ->Class))
				dse(*Occ->Inst);
				else
				handleRealOcc(*Occ, S);
				} else
				// Not a real occ, but still a meminst that could kill or alias.
				handleMayKill(*I.getInst(), S);

				// Lambdas directly exposed to reverse CFG exit are up-unsafe.
				if (&BB == &BB.getParent()->getEntryBlock())
				for (RedIdx Idx = 0; Idx < S.States.size(); Idx += 1)
				updateUpSafety(Idx, S);

				// Connect to predecessor lambdas.
				for (BasicBlock *Pred : predecessors(&BB))
				for (LambdaOcc &L : Blocks[Pred].Lambdas)
				L.addOperand(BB, S.States[L.Class].ReprOcc);

				return S;
				}

				void renamePass() {
				struct Entry {
				DomTreeNode *Node;
				DomTreeNode::iterator ChildIt;
				RenameState Inner;
				};

				SmallVector<Entry, 8> Stack;
				RenameState RootState(Worklist, DeadOnExit);
				if (BasicBlock *Root = PDT.getRootNode()->getBlock())
				// Real and unique exit block.
				Stack.push_back({PDT.getRootNode(), PDT.getRootNode()->begin(),
				renameBlock(*Root, RootState)});
				else
				// Multiple exits and/or infinite loops.
				for (DomTreeNode N : PDT.getRootNode())
				Stack.push_back(
				{N, N->begin(), renameBlock(*N->getBlock(), RootState)});

				// Visit blocks in post-dom pre-order
				while (!Stack.empty()) {
				if (Stack.back().ChildIt == Stack.back().Node->end())
				Stack.pop_back();
				else {
				DomTreeNode Cur = Stack.back().ChildIt++;
				if (Cur->begin() != Cur->end())
				Stack.push_back({Cur, Cur->begin(),
				renameBlock(*Cur->getBlock(), Stack.back().Inner)});
				else
				renameBlock(*Cur->getBlock(), Stack.back().Inner);
				}
				}
				}

				using SplitEdgeMap =
				DenseMap<std::pair<BasicBlock , BasicBlock >, BasicBlock *>;

				void insertNewOccs(LambdaOcc &L, SubIdx Sub, Instruction &Ins,
				SSAUpdater &StoreVals, SplitEdgeMap &SplitBlocks) {
				auto insert = [&](BasicBlock *Succ) {
				Instruction &I =
				setStoreOp(Ins.clone(), StoreVals.GetValueAtEndOfBlock(L.Block));
				DEBUG(dbgs() << "PRE insert: " << I << " @ " << Succ->getName()
				<< " (from " << L.Block->getName() << ")\n");

				if (SplitBlocks.count({L.Block, Succ}))
				Succ = SplitBlocks[{L.Block, Succ}];
				else if (BasicBlock *Split = SplitCriticalEdge(L.Block, Succ))
				Succ = SplitBlocks[{L.Block, Succ}] = Split;
				else
				SplitBlocks[{L.Block, Succ}] = Succ;

				// Need to insert after phis.
				// - TODO: Cache insertion pos..
				// - TODO: getFirstNonPHI; cache it. and we shouldn't be breaking eh pads,
				// or inserting into catchswitches.
				BasicBlock::iterator InsPos = find_if(*Succ, [](const Instruction &I) {
				return !isa<PHINode>(&I) && !isa<LandingPadInst>(&I);
				});
				I.insertBefore(&*InsPos);
				};

				assert(L.willBeAnt(Sub) && "Can only PRE across willBeAnt lambdas.");
				for (BasicBlock *Succ : L.NullDefs)
				insert(Succ);
				for (LambdaOcc::Operand &Op : L.Defs)
				if (!Op.hasRealUse())
				if (Op.getLambda() && !Op.getLambda()->willBeAnt(Sub))
				insert(Op.Succ);
				}

				void convertPartialReds() {
				SplitEdgeMap SplitBlocks;
				for (RedClass &Class : Worklist) {
				// Determine PRE-ability of this class' lambdas.
				Class.computeWillBeAnt();

				// TODO: Iterate by lambda, not subclass.
				SSAUpdater StoreVals;
				for (SubIdx Sub = 0; Sub < Class.numSubclasses(); Sub += 1) {
				assert(getStoreOp(*Class.StoreTypes[Sub]) &&
				"Expected an analyzable store instruction.");
				StoreVals.Initialize(getStoreOp(*Class.StoreTypes[Sub])->getType(),
				Class.StoreTypes[Sub]->getName());

				// Collect all possible store operand definitions that will flow into
				// the inserted stores.
				for (LambdaOcc *L : Class.Lambdas) {
				DEBUG(L->print(dbgs() << "Analyzing ", Worklist, false, &Sub)
				<< "\n");
				if (L->willBeAnt(Sub))
				for (LambdaOcc::RealUse &Use : L->Uses) {
				if (Use.Occ->Subclass == Sub) {
				DEBUG(dbgs() << "Including " << *getStoreOp(Use.getInst())
				<< "\n");
				StoreVals.AddAvailableValue(Use.getInst().getParent(),
				getStoreOp(Use.getInst()));
				if (Use.Occ->canDSE())
				dse(Use.getInst());
				}
	}			}
				}
				for (LambdaOcc *L : Class.Lambdas) {
				if (L->willBeAnt(Sub)) {
				DEBUG(L->print(dbgs() << "Trying to PRE #" << Sub, Worklist, true,
				&Sub));
				insertNewOccs(L, Sub, Class.StoreTypes[Sub], StoreVals,
				SplitBlocks);
				}
				}
				}
				}
				}

				// Insert lambdas at reverse IDF of real occs and aliasing loads.
				void insertLambdas() {
				for (RedIdx Idx = 0; Idx < Worklist.size(); Idx += 1) {
				// Find kill-only blocks.
				for (BasicBlock &BB : F)
				for (const InstOrReal &I : Blocks[&BB].Insts) {
				Instruction *II = I.getInst();
				if ((II && II->mayThrow() && Worklist[Idx].Escapes) \|\|
				(I.getOcc() && I.getOcc()->KillLoc.Ptr &&
				AA.alias(Worklist[Idx].Loc, I.getOcc()->KillLoc) != NoAlias) \|\|
				(II && getModRefInfo(Idx, *II) & MRI_Ref)) {
				DEBUG(dbgs() << "Found kill for " << Worklist[Idx] << ": ");
				if (II)
				DEBUG(dbgs() << *II << "\n");
				else
				DEBUG(I.getOcc()->print(dbgs(), Worklist) << "\n");
				Worklist[Idx].DefBlocks.insert(&BB);
				break;
				}
				}

				// Compute lambdas.
				ReverseIDFCalculator RIDF(PDT);
				RIDF.setDefiningBlocks(Worklist[Idx].DefBlocks);
				SmallVector<BasicBlock *, 8> LambdaBlocks;
				RIDF.calculate(LambdaBlocks);

				for (BasicBlock *BB : LambdaBlocks) {
				Blocks[BB].Lambdas.emplace_back(NextID++, *BB, Idx,
				Worklist[Idx].numSubclasses());
				Worklist[Idx].Lambdas.push_back(&Blocks[BB].Lambdas.back());
				DEBUG(Blocks[BB].Lambdas.back().print(dbgs() << "Inserted ", Worklist)
				<< "\n");
				}
				}
				}

				void addRealOcc(RealOcc &&Occ, RedIdx Idx) {
				auto Key =
				std::make_pair(Occ.Inst->getOpcode(), getStoreOp(*Occ.Inst)->getType());
				auto Inserted =
				Worklist[Idx].Subclasses.insert({Key, Worklist[Idx].numSubclasses()});
				if (Inserted.second)
				Worklist[Idx].StoreTypes.push_back(Occ.Inst);
				Occ.setClass(Idx, Inserted.first->second);
				DEBUG(dbgs() << "Added real occ @ " << Occ.Inst->getParent()->getName()
				<< " " << *Occ.Inst << "\n\tto subclass "
				<< *Worklist[Idx].StoreTypes[Inserted.first->second]
				<< "\n\tof " << Worklist[Idx] << "\n");

				BasicBlock *BB = Occ.Inst->getParent();
				Worklist[Idx].DefBlocks.insert(BB);
				Blocks[BB].Insts.emplace_back(std::move(Occ));
				}

				// Collect real occs and track their basic blocks.
				void collectOccurrences() {
				EscapeTracker Tracker(F, TLI);
				DenseMap<MemoryLocation, RedIdx> BelongsToClass;

				for (BasicBlock &BB : F)
				for (Instruction &I : BB)
				if (auto LocOcc = makeRealOcc(I, NextID)) {
				// Found a real occ for this instruction. Figure out which redundancy
				// class its store locbelongs to.
				RedIdx Idx = classifyLoc(LocOcc->first, BelongsToClass, Tracker);
				addRealOcc(std::move(LocOcc->second), Idx);
				} else if (AA.getModRefInfo(&I))
				Blocks[&BB].Insts.emplace_back(I);
				}

				bool run() {
				if (!PDT.getRootNode()) {
				DEBUG(dbgs() << "FIXME: ran into the PDT bug. nothing we can do.\n");
				davideUnsubmitted Not Done Reply Inline Actions Can you please describe the `PDT` bug? I'm not really familiar with it (or I call it with another name :) davide: Can you please describe the `PDT` bug? I'm not really familiar with it (or I call it with…
	return false;			return false;
				filcabUnsubmitted Not Done Reply Inline Actions Should we assert? filcab: Should we assert?
	}			}

				collectOccurrences();
				insertLambdas();
				renamePass();
				convertPartialReds();

				// DSE.
				while (!DeadStores.empty()) {
				Instruction *Dead = DeadStores.front();
				DeadStores.pop_front();
				for (Use &U : Dead->operands()) {
				Instruction *Op = dyn_cast<Instruction>(U);
				U.set(nullptr);
				if (Op && isInstructionTriviallyDead(Op, &TLI))
				DeadStores.push_front(Op);
				}
				Dead->eraseFromParent();
				}

				return true;
				}
				};

	class PDSELegacyPass : public FunctionPass {			class PDSELegacyPass : public FunctionPass {
	public:			public:
	PDSELegacyPass() : FunctionPass(ID) {			PDSELegacyPass() : FunctionPass(ID) {
	initializePDSELegacyPassPass(*PassRegistry::getPassRegistry());			initializePDSELegacyPassPass(*PassRegistry::getPassRegistry());
	}			}

	bool runOnFunction(Function &F) override {			bool runOnFunction(Function &F) override {
	if (skipFunction(F))			if (skipFunction(F))
	return false;			return false;

	return runPDSE(F, getAnalysis<AAResultsWrapperPass>().getAAResults(),			return PDSE(F, getAnalysis<AAResultsWrapperPass>().getAAResults(),
	getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(),			getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(),
	getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());			getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())
				.run();
	}			}

	void getAnalysisUsage(AnalysisUsage &AU) const override {			void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<PostDominatorTreeWrapperPass>();			AU.addRequired<PostDominatorTreeWrapperPass>();
	AU.addRequired<AAResultsWrapperPass>();			AU.addRequired<AAResultsWrapperPass>();
	AU.addRequired<TargetLibraryInfoWrapperPass>();			AU.addRequired<TargetLibraryInfoWrapperPass>();

	AU.setPreservesCFG();			AU.setPreservesCFG();
	AU.addPreserved<PostDominatorTreeWrapperPass>();
	AU.addPreserved<GlobalsAAWrapperPass>();			AU.addPreserved<GlobalsAAWrapperPass>();
	}			}

	static char ID; // Pass identification, replacement for typeid			static char ID; // Pass identification, replacement for typeid
	};			};
	} // end anonymous namespace			} // end anonymous namespace

	char PDSELegacyPass::ID = 0;			char PDSELegacyPass::ID = 0;

	INITIALIZE_PASS_BEGIN(PDSELegacyPass, "pdse", "Partial Dead Store Elimination",			INITIALIZE_PASS_BEGIN(PDSELegacyPass, "pdse", "Partial Dead Store Elimination",
	false, false)			false, false)
	INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)			INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)			INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)			INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)			INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
	INITIALIZE_PASS_END(PDSELegacyPass, "pdse", "Partial Dead Store Elimination",			INITIALIZE_PASS_END(PDSELegacyPass, "pdse", "Partial Dead Store Elimination",
	false, false)			false, false)

	namespace llvm {			namespace llvm {
	PreservedAnalyses PDSEPass::run(Function &F, FunctionAnalysisManager &AM) {			PreservedAnalyses PDSEPass::run(Function &F, FunctionAnalysisManager &AM) {
	if (!runPDSE(F, AM.getResult<AAManager>(F),			if (!PDSE(F, AM.getResult<AAManager>(F),
	AM.getResult<PostDominatorTreeAnalysis>(F),			AM.getResult<PostDominatorTreeAnalysis>(F),
	AM.getResult<TargetLibraryAnalysis>(F)))			AM.getResult<TargetLibraryAnalysis>(F))
				.run())
	return PreservedAnalyses::all();			return PreservedAnalyses::all();

	PreservedAnalyses PA;			PreservedAnalyses PA;
	PA.preserveSet<CFGAnalyses>();			PA.preserveSet<CFGAnalyses>();
	PA.preserve<PostDominatorTreeAnalysis>();
	PA.preserve<GlobalsAA>();			PA.preserve<GlobalsAA>();
	return PA;			return PA;
	}			}

				FunctionPass *createPDSEPass() { return new PDSELegacyPass(); }
	} // end namespace llvm			} // end namespace llvm

				// vim: set shiftwidth=2 tabstop=2:
				filcabUnsubmitted Not Done Reply Inline Actions Please remove editor-related markup. filcab: Please remove editor-related markup.

test/Transforms/DeadStoreElimination/pdse.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt -S -basicaa -pdse %s \| FileCheck %s

				declare void @may_throw()
				declare noalias i8* @malloc(i32)
				declare void @llvm.memcpy.p0i8.p0i8.i64(i8, i8, i64 , i32 , i1 )
				declare void @llvm.memmove.p0i8.p0i8.i64(i8, i8, i64, i32, i1)
				declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)

				define void @lo_and_chow(i8* %x, i1 %br0, i1 %br1) {
				; CHECK-LABEL: @lo_and_chow(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: [[V:%.]] = load i8, i8 [[X:%.*]]
				; CHECK-NEXT: [[V1:%.*]] = add nuw i8 [[V]], 1
				; CHECK-NEXT: br label [[BB1:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: [[TMP0:%.]] = phi i8 [ [[V1]], [[BB3:%.]] ], [ [[V1]], [[BB0:%.*]] ]
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB2:%.]], label [[BB3]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 [[TMP0]], i8* [[X]]
				; CHECK-NEXT: [[T:%.]] = load i8, i8 [[X]]
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: br i1 [[BR1:%.]], label [[BB1]], label [[EXIT:%.]]
				; CHECK: exit:
				; CHECK-NEXT: store i8 [[V1]], i8* [[X]]
				; CHECK-NEXT: ret void
				;
				bb0:
				%v = load i8, i8* %x
				%v1 = add nuw i8 %v, 1
				store i8 %v1, i8* %x
				br label %bb1
				bb1:
				br i1 %br0, label %bb2, label %bb3
				bb2:
				%t = load i8, i8* %x
				br label %bb3
				bb3:
				store i8 %v1, i8* %x
				br i1 %br1, label %bb1, label %exit
				exit:
				ret void
				}

				define void @lo_and_chow_maythrow(i8* %x, i1 %br0, i1 %br1) {
				; CHECK-LABEL: @lo_and_chow_maythrow(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: [[V:%.]] = load i8, i8 [[X:%.*]]
				; CHECK-NEXT: [[V1:%.*]] = add nuw i8 [[V]], 1
				; CHECK-NEXT: br label [[BB1:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: [[TMP0:%.]] = phi i8 [ [[V1]], [[BB3:%.]] ], [ [[V1]], [[BB0:%.*]] ]
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB2:%.]], label [[BB3]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 [[TMP0]], i8* [[X]]
				; CHECK-NEXT: call void @may_throw()
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: br i1 [[BR1:%.]], label [[BB1]], label [[EXIT:%.]]
				; CHECK: exit:
				; CHECK-NEXT: store i8 [[V1]], i8* [[X]]
				; CHECK-NEXT: ret void
				;
				bb0:
				%v = load i8, i8* %x
				%v1 = add nuw i8 %v, 1
				store i8 %v1, i8* %x
				br label %bb1
				bb1:
				br i1 %br0, label %bb2, label %bb3
				bb2:
				call void @may_throw()
				br label %bb3
				bb3:
				store i8 %v1, i8* %x
				br i1 %br1, label %bb1, label %exit
				exit:
				ret void
				}

				; demos the self-loop problem in post-dom tree.
				; define void @f(i8* %x) {
				; a:
				; store i8 0, i8* %x
				; switch i8 0, label %b [
				; i8 1, label %c
				; ]
				; b:
				; store i8 1, i8* %x
				; br label %b
				; c:
				; store i8 2, i8* %x
				; br label %d
				; d:
				; br label %d
				; e:
				; store i8 3, i8* %x
				; ret void
				; }
				;
				; define void @g(i8* %a, i8* %b) {
				; bb0:
				; store i8 undef, i8* %b
				; store i8 undef, i8* %a
				; br i1 undef, label %bb1, label %bb2
				; bb1:
				; %tmp0 = load i8, i8* %a
				; ret void
				; bb2:
				; store i8 undef, i8* %a
				; ret void
				; }

				; define void @i(i8* noalias %x, i8* noalias %y, i1 %z) {
				; %whatever = load i8, i8* %x
				; br label %nextblock
				;
				; nextblock:
				; store i8 %whatever, i8* %x
				; store i8 123, i8* %x
				; br i1 %z, label %nextblock, label %fin
				;
				; fin:
				; ret void
				; }

				define i8* @j(i8* %a, i8* %e, i1 %c) {
				; CHECK-LABEL: @j(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: [[P:%.]] = tail call i8 @malloc(i32 4)
				; CHECK-NEXT: br i1 [[C:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[A:%.]], i8 nonnull [[E:%.*]], i64 64, i32 8, i1 false)
				; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A]], i8 undef, i64 32, i32 8, i1 false)
				; CHECK-NEXT: [[X:%.]] = bitcast i8 [[A]] to i64*
				; CHECK-NEXT: [[Z:%.]] = getelementptr i64, i64 [[X]], i64 1
				; CHECK-NEXT: store i64 undef, i64* [[Z]]
				; CHECK-NEXT: store i8 undef, i8* [[A]]
				; CHECK-NEXT: call void @may_throw()
				; CHECK-NEXT: store i8 0, i8* [[P]]
				; CHECK-NEXT: store i8 undef, i8* [[A]]
				; CHECK-NEXT: br label [[BB3:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: ret i8* [[P]]
				;
				bb0:
				%b = alloca i8
				%P = tail call i8* @malloc(i32 4)
				br i1 %c, label %bb1, label %bb2
				bb1:
				call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* nonnull %a, i64 64, i32 8, i1 false)
				call void @llvm.memmove.p0i8.p0i8.i64(i8* %a, i8* nonnull %e, i64 64, i32 8, i1 false)
				call void @llvm.memset.p0i8.i64(i8* %a, i8 undef, i64 32, i32 8, i1 false)
				%x = bitcast i8* %a to i64*
				%z = getelementptr i64, i64* %x, i64 1
				store i8 undef, i8* %a
				store i64 undef, i64* %z
				store i8 undef, i8* %a
				; ^ future full elim phase should kill this
				store i8 4, i8* %P
				call void @may_throw()
				store i8 0, i8* %P
				store i8 undef, i8* %a
				br label %bb3
				bb2:
				br label %bb3
				bb3:
				ret i8* %P
				}

				define void @aliasing_load_kills(i8* %a) {
				; CHECK-LABEL: @aliasing_load_kills(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: br label [[BB1:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: br i1 undef, label [[BB2:%.]], label [[BB3:%.]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 undef, i8* [[A:%.*]]
				; CHECK-NEXT: br label [[BB4:%.*]]
				; CHECK: bb3:
				; CHECK-NEXT: store i8 undef, i8* [[A]]
				; CHECK-NEXT: [[X:%.]] = load i8, i8 [[A]]
				; CHECK-NEXT: store i8 undef, i8* [[A]]
				; CHECK-NEXT: br label [[BB4]]
				; CHECK: bb4:
				; CHECK-NEXT: ret void
				;
				bb0:
				store i8 undef, i8* %a
				br label %bb1
				bb1:
				store i8 undef, i8* %a
				br i1 undef, label %bb2, label %bb3
				bb2:
				store i8 undef, i8* %a
				br label %bb4
				bb3:
				%x = load i8, i8* %a
				store i8 undef, i8* %a
				br label %bb4
				bb4:
				ret void
				}

				define void @memcpy_example(i8* %a, i8* noalias %b, i1 %br0) {
				; CHECK-LABEL: @memcpy_example(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A:%.]], i8 [[B:%.*]], i64 64, i32 8, i1 false)
				; CHECK-NEXT: br label [[BB3:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A]], i8* [[B]], i64 64, i32 8, i1 false)
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: ret void
				;
				bb0:
				call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 64, i32 8, i1 false)
				br i1 %br0, label %bb1, label %bb2
				bb1:
				call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 64, i32 8, i1 false)
				br label %bb3
				bb2:
				br label %bb3
				bb3:
				ret void
				}

				; http://i.imgur.com/abuFdZ2.png
				filcabUnsubmitted Not Done Reply Inline Actions I'd rather have an ASCII diagram instead of relying on an external image service (also hard to look at when reading tests). filcab: I'd rather have an ASCII diagram instead of relying on an external image service (also hard to…
				define void @multiple_pre(i8* %a, i8 %b, i1 %c, i1 %d) {
				; CHECK-LABEL: @multiple_pre(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: [[R:%.]] = add i8 [[B:%.]], 1
				; CHECK-NEXT: br i1 [[C:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: [[S:%.*]] = add i8 [[R]], 2
				; CHECK-NEXT: br label [[BB3:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: [[TMP0:%.*]] = phi i8 [ [[R]], [[BB2]] ], [ [[S]], [[BB1]] ]
				; CHECK-NEXT: br i1 [[D:%.]], label [[BB4:%.]], label [[BB5:%.*]]
				; CHECK: bb4:
				; CHECK-NEXT: store i8 [[R]], i8* [[A:%.*]]
				; CHECK-NEXT: br label [[EX:%.*]]
				; CHECK: bb5:
				; CHECK-NEXT: store i8 [[TMP0]], i8* [[A]]
				; CHECK-NEXT: br label [[EX]]
				; CHECK: ex:
				; CHECK-NEXT: ret void
				;
				bb0:
				%r = add i8 %b, 1
				br i1 %c, label %bb1, label %bb2
				bb1:
				%s = add i8 %r, 2
				store i8 %s, i8* %a
				br label %bb3
				bb2:
				store i8 %r, i8* %a
				br label %bb3
				bb3:
				br i1 %d, label %bb4, label %bb5
				bb4:
				store i8 %r, i8* %a
				br label %ex
				bb5:
				br label %ex
				ex:
				ret void
				}

				; PRE insertion can't happen at the bb3 lambda because its uses, despite
				; belonging to the same redundancy class, are altogether different instruction
				; types. PDSE handles this by partitioning each redundancy class into
				; like-opcode subclasses, and calculating anticipation separately for each.
				;
				; The i8* %a class below comprises of memset and store subclasses, and the bb3
				; lambda counts as up-unsafe (and ultimately not willBeAnt) for both because:
				; For the store subclass, bb3 is exposed to a memset (which counts as an
				; aliasing, non-PRE-insertable occurrence), and vice versa for the memset
				; subclass.
				define void @unable_to_elim(i8* %a, i8 %b, i1 %c, i1 %d) {
				; CHECK-LABEL: @unable_to_elim(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: [[R:%.]] = add i8 [[B:%.]], 1
				; CHECK-NEXT: br i1 [[C:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: [[S:%.*]] = add i8 [[R]], 2
				; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A:%.*]], i8 [[S]], i64 1, i32 1, i1 false)
				; CHECK-NEXT: br label [[BB3:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 [[R]], i8* [[A]]
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: br i1 [[D:%.]], label [[BB4:%.]], label [[BB5:%.*]]
				; CHECK: bb4:
				; CHECK-NEXT: store i8 [[R]], i8* [[A]]
				; CHECK-NEXT: br label [[EX:%.*]]
				; CHECK: bb5:
				; CHECK-NEXT: br label [[EX]]
				; CHECK: ex:
				; CHECK-NEXT: ret void
				;
				bb0:
				%r = add i8 %b, 1
				br i1 %c, label %bb1, label %bb2
				bb1:
				%s = add i8 %r, 2
				call void @llvm.memset.p0i8.i64(i8* %a, i8 %s, i64 1, i32 1, i1 false)
				br label %bb3
				bb2:
				store i8 %r, i8* %a
				br label %bb3
				bb3:
				br i1 %d, label %bb4, label %bb5
				bb4:
				store i8 %r, i8* %a
				br label %ex
				bb5:
				br label %ex
				ex:
				ret void
				}

				; FIXME: PDSE-ing
				;
				; a, b, c --- lambda ---
				; \|
				; +------- c, b, a
				;
				; into:
				;
				; --- lambda --- a, b, c
				; \|
				; +------- c, b, a
				;
				; would require multiple rounds of computeWillBeAnt.
				define void @pre_blocked(i8* %a, i8* %b, i8* %c, i1 %br0) {
				; CHECK-LABEL: @pre_blocked(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: store i8 1, i8* [[A:%.*]]
				; CHECK-NEXT: store i8 1, i8* [[B:%.*]]
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: store i8 1, i8* [[C:%.*]]
				; CHECK-NEXT: br label [[BB3:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 11, i8* [[C]]
				; CHECK-NEXT: store i8 11, i8* [[B]]
				; CHECK-NEXT: store i8 11, i8* [[A]]
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: ret void
				;
				bb0:
				store i8 1, i8* %a
				store i8 1, i8* %b
				store i8 1, i8* %c
				br i1 %br0, label %bb1, label %bb2
				bb1:
				br label %bb3
				bb2:
				store i8 11, i8* %c
				store i8 11, i8* %b
				store i8 11, i8* %a
				br label %bb3
				bb3:
				ret void
				}

				; FIXME: Should transform this:
				;
				; s, s' --- lambda ---
				; \|
				; +------- s
				;
				; into:
				;
				; --- lambda --- s, s'
				; \|
				; +------- s', s
				define void @pre_blocked_again(i64* %a, i1 %br0) {
				; CHECK-LABEL: @pre_blocked_again(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: store i64 1, i64* [[A:%.*]]
				; CHECK-NEXT: [[X:%.]] = bitcast i64 [[A]] to i8*
				; CHECK-NEXT: [[B:%.]] = getelementptr i8, i8 [[X]], i64 1
				; CHECK-NEXT: store i8 2, i8* [[B]]
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: store i64 1, i64* [[A]]
				; CHECK-NEXT: br label [[BB3:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: br label [[BB3]]
				; CHECK: bb3:
				; CHECK-NEXT: ret void
				;
				bb0:
				store i64 1, i64* %a
				%x = bitcast i64* %a to i8*
				%b = getelementptr i8, i8* %x, i64 1
				store i8 2, i8* %b
				br i1 %br0, label %bb1, label %bb2
				bb1:
				store i64 1, i64* %a
				br label %bb3
				bb2:
				br label %bb3
				bb3:
				ret void
				}

				define void @never_escapes() {
				; CHECK-LABEL: @never_escapes(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: br label [[BB1:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: br label [[BB2:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: ret void
				;
				bb0:
				%a = alloca i8
				br label %bb1
				bb1:
				store i8 12, i8* %a
				br label %bb2
				bb2:
				ret void
				}

				define void @propagate_up_unsafety(i8* %a, i1 %br0, i1 %br1, i1 %br2) {
				; CHECK-LABEL: @propagate_up_unsafety(
				; CHECK-NEXT: entry:
				; CHECK-NEXT: br i1 [[BR2:%.]], label [[BB5:%.]], label [[BB6:%.*]]
				; CHECK: bb5:
				; CHECK-NEXT: br label [[BB0:%.*]]
				; CHECK: bb6:
				; CHECK-NEXT: br label [[BB0]]
				; CHECK: bb0:
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: br i1 [[BR1:%.]], label [[BB3:%.]], label [[BB4:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 1, i8* [[A:%.*]]
				; CHECK-NEXT: ret void
				; CHECK: bb3:
				; CHECK-NEXT: br label [[BB0]]
				; CHECK: bb4:
				; CHECK-NEXT: store i8 3, i8* [[A]]
				; CHECK-NEXT: ret void
				;
				entry:
				br i1 %br2, label %bb5, label %bb6
				bb5:
				br label %bb0
				bb6:
				store i8 12, i8* %a
				br label %bb0
				bb0:
				br i1 %br0, label %bb1, label %bb2
				bb1:
				br i1 %br1, label %bb3, label %bb4
				bb2:
				store i8 1, i8* %a
				ret void
				bb3:
				br label %bb0
				bb4:
				store i8 3, i8* %a
				ret void
				}

				define void @multiple_insertions(i8* %a, i32 %br0) {
				; CHECK-LABEL: @multiple_insertions(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: switch i32 [[BR0:%.]], label [[BB1:%.]] [
				; CHECK-NEXT: i32 1, label [[BB2:%.*]]
				; CHECK-NEXT: i32 2, label [[BB3:%.*]]
				; CHECK-NEXT: ]
				; CHECK: bb1:
				; CHECK-NEXT: store i8 12, i8* [[A:%.*]]
				; CHECK-NEXT: br label [[BB4:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 12, i8* [[A]]
				; CHECK-NEXT: br label [[BB4]]
				; CHECK: bb3:
				; CHECK-NEXT: store i8 12, i8* [[A]]
				; CHECK-NEXT: br label [[BB4]]
				; CHECK: bb4:
				; CHECK-NEXT: ret void
				;
				bb0:
				store i8 12, i8* %a
				switch i32 %br0, label %bb1 [
				i32 1, label %bb2
				i32 2, label %bb3
				]
				bb1:
				store i8 12, i8* %a
				br label %bb4
				bb2:
				br label %bb4
				bb3:
				br label %bb4
				bb4:
				ret void
				}

				; The lambda at bb1 can be PRE-ed, but the insertion comes from behind an
				; earlier lambda -- the one at bb0.
				define void @propagate_uses(i8* %a, i1 %br0, i1 %br1) {
				; CHECK-LABEL: @propagate_uses(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: br i1 [[BR0]], label [[BB3:%.]], label [[BB1_BB4_CRIT_EDGE:%.]]
				; CHECK: bb1.bb4_crit_edge:
				; CHECK-NEXT: store i8 1, i8* [[A:%.*]]
				; CHECK-NEXT: br label [[BB4:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 1, i8* [[A]]
				; CHECK-NEXT: br label [[BB5:%.*]]
				; CHECK: bb3:
				; CHECK-NEXT: store i8 2, i8* [[A]]
				; CHECK-NEXT: br label [[BB4]]
				; CHECK: bb4:
				; CHECK-NEXT: br label [[BB5]]
				; CHECK: bb5:
				; CHECK-NEXT: ret void
				;
				bb0:
				store i8 1, i8* %a
				br i1 %br0, label %bb1, label %bb2
				bb1:
				br i1 %br0, label %bb3, label %bb4
				bb2:
				br label %bb5
				bb3:
				store i8 2, i8* %a
				br label %bb4
				bb4:
				br label %bb5
				bb5:
				ret void
				}

				; Check that renaming rules handle overwrites correctly.
				define void @small_store_can_dse(i8, i8 noalias) {
				; CHECK-LABEL: @small_store_can_dse(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: [[TMP2:%.]] = getelementptr inbounds i8, i8 [[TMP0:%.*]], i64 2
				; CHECK-NEXT: [[TMP3:%.]] = load i8, i8 [[TMP2]]
				; CHECK-NEXT: [[TMP4:%.]] = bitcast i8 [[TMP0]] to i64*
				; CHECK-NEXT: store i64 3, i64* [[TMP4]]
				; CHECK-NEXT: br label [[BB1:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: [[TMP5:%.]] = load i8, i8 [[TMP0]]
				; CHECK-NEXT: [[TMP6:%.]] = load i8, i8 [[TMP2]]
				; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1:%.*]], i64 8, i32 1, i1 false)
				; CHECK-NEXT: ret void
				;
				bb0:
				store i8 1, i8* %0
				%2 = getelementptr inbounds i8, i8* %0, i64 2
				%3 = load i8, i8* %2
				%4 = bitcast i8* %0 to i64*
				store i64 3, i64* %4
				br label %bb1
				bb1:
				%5 = load i8, i8* %0
				store i8 1, i8* %0
				%6 = load i8, i8* %2
				call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 1, i1 false)
				ret void
				}

				; Nothing should be inserted. The i64 store should be used by the lambda in bb0.
				define void @no_extra_insert(i8* %a, i1 %br0) {
				; CHECK-LABEL: @no_extra_insert(
				; CHECK-NEXT: bb0:
				; CHECK-NEXT: br i1 [[BR0:%.]], label [[BB1:%.]], label [[BB2:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: [[B:%.]] = bitcast i8 [[A:%.]] to i64
				; CHECK-NEXT: store i64 2, i64* [[B]]
				; CHECK-NEXT: br label [[EXIT:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 3, i8* [[A]]
				; CHECK-NEXT: br label [[EXIT]]
				; CHECK: exit:
				; CHECK-NEXT: ret void
				;
				bb0:
				store i8 1, i8* %a
				br i1 %br0, label %bb1, label %bb2
				bb1:
				%b = bitcast i8* %a to i64*
				store i64 2, i64* %b
				br label %exit
				bb2:
				store i8 3, i8* %a
				br label %exit
				exit:
				ret void
				}

				; No DSE because the memmove doubles as an aliasing load.
				define void @kills_own_occ_class(i8* %a, i8* %b) {
				; CHECK-LABEL: @kills_own_occ_class(
				; CHECK-NEXT: store i8 3, i8* [[A:%.*]]
				; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[A]], i8* nonnull [[B:%.*]], i64 1, i32 8, i1 false)
				; CHECK-NEXT: ret void
				;
				store i8 3, i8* %a
				call void @llvm.memmove.p0i8.p0i8.i64(i8* %a, i8* nonnull %b, i64 1, i32 8, i1 false)
				ret void
				}

				; i1* %tmp and i8* %arg belong to the same redundancy class, but have different
				; types. So subclasses need to be grouped by the store value operand type in
				; addition to opcode.
				define void @subclass_on_store_value_type_too(i8* %arg) {
				; CHECK-LABEL: @subclass_on_store_value_type_too(
				; CHECK-NEXT: bb:
				; CHECK-NEXT: [[TMP:%.]] = bitcast i8 [[ARG:%.]] to i1
				; CHECK-NEXT: br label [[BB1:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i1 undef, i1* [[TMP]]
				; CHECK-NEXT: ret void
				;
				bb:
				store i8 -123, i8* %arg
				%tmp = bitcast i8* %arg to i1*
				br label %bb1
				bb1:
				store i1 undef, i1* %tmp
				br i1 undef, label %bb1, label %bb2
				bb2:
				ret void
				}

				define void @dont_include_nonsubclass_uses(i8* %arg) {
				; CHECK-LABEL: @dont_include_nonsubclass_uses(
				; CHECK-NEXT: bb:
				; CHECK-NEXT: br label [[BB1:%.*]]
				; CHECK: bb1:
				; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]]
				; CHECK: bb2:
				; CHECK-NEXT: store i8 -123, i8* [[ARG:%.*]]
				; CHECK-NEXT: ret void
				;
				bb:
				%tmp = bitcast i8* %arg to i1*
				br label %bb1
				bb1:
				store i1 undef, i1* %tmp
				br i1 undef, label %bb1, label %bb2
				bb2:
				store i8 -123, i8* %arg
				ret void
				}

				; The critical edge from the lambda at bb4 to bb6 is an unsplittable, which
				; prevents PRE from filling its corresponding null def. PDSE therefore
				; classifies bb4 as `CanBeAnt == false`.
				define void @cant_split_indirectbr_edge() {
				; CHECK-LABEL: @cant_split_indirectbr_edge(
				; CHECK-NEXT: bb:
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB2:%.*]], label %bb6]
				; CHECK: bb2:
				; CHECK-NEXT: indirectbr i8* undef, [label %bb3]
				; CHECK: bb3:
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB4:%.*]], label %bb3]
				; CHECK: bb4:
				; CHECK-NEXT: store float undef, float* undef, align 1
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB2]], label %bb6]
				; CHECK: bb6:
				; CHECK-NEXT: ret void
				;
				bb:
				indirectbr i8* undef, [label %bb2, label %bb6]
				bb2:
				indirectbr i8* undef, [label %bb3]
				bb3:
				store float undef, float* undef, align 1
				indirectbr i8* undef, [label %bb4, label %bb3]
				bb4:
				indirectbr i8* undef, [label %bb2, label %bb6]
				bb6:
				ret void
				}

				; Same as cant_split_indirectbr_edge, but with a lambda use.
				define void @cant_split_indirectbr_edge2() {
				; CHECK-LABEL: @cant_split_indirectbr_edge2(
				; CHECK-NEXT: bb:
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB2:%.]], label [[BB4:%.]], label %bb6]
				; CHECK: bb2:
				; CHECK-NEXT: indirectbr i8* undef, [label %bb3]
				; CHECK: bb3:
				; CHECK-NEXT: store float undef, float* undef, align 1
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB4]], label %bb3]
				; CHECK: bb4:
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB2]], label %bb6]
				; CHECK: bb6:
				; CHECK-NEXT: ret void
				;
				bb:
				indirectbr i8* undef, [label %bb2, label %bb4, label %bb6]
				bb2:
				indirectbr i8* undef, [label %bb3]
				bb3:
				store float undef, float* undef, align 1
				indirectbr i8* undef, [label %bb4, label %bb3]
				bb4:
				indirectbr i8* undef, [label %bb2, label %bb6]
				bb6:
				ret void
				}

				define void @cant_split2(i8* %arg) {
				; CHECK-LABEL: @cant_split2(
				; CHECK-NEXT: bb:
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB1:%.*]], label %bb2]
				; CHECK: bb1:
				; CHECK-NEXT: store i8* blockaddress(@cant_split2, [[BB2:%.]]), i8* undef, align 4
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB5:%.*]], label %bb1]
				; CHECK: bb2:
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB4:%.*]], label %bb3]
				; CHECK: bb3:
				; CHECK-NEXT: indirectbr i8* undef, [label [[BB1]], label %bb5]
				; CHECK: bb4:
				; CHECK-NEXT: unreachable
				; CHECK: bb5:
				; CHECK-NEXT: ret void
				;
				bb:
				indirectbr i8* undef, [label %bb1, label %bb2]
				bb1:
				store i8* blockaddress(@cant_split2, %bb2), i8** undef, align 4
				indirectbr i8* undef, [label %bb5, label %bb1]
				bb2:
				indirectbr i8* undef, [label %bb4, label %bb3]
				bb3:
				indirectbr i8* undef, [label %bb1, label %bb5]
				bb4:
				unreachable
				bb5:
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[PDSE] Add PDSE.
Needs RevisionPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 93443

include/llvm/LinkAllPasses.h

include/llvm/Transforms/IPO/PassManagerBuilder.h

include/llvm/Transforms/Scalar.h

lib/Transforms/IPO/PassManagerBuilder.cpp

lib/Transforms/Scalar/PDSE.cpp

test/Transforms/DeadStoreElimination/pdse.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PDSE] Add PDSE.Needs RevisionPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 93443

include/llvm/LinkAllPasses.h

include/llvm/Transforms/IPO/PassManagerBuilder.h

include/llvm/Transforms/Scalar.h

lib/Transforms/IPO/PassManagerBuilder.cpp

lib/Transforms/Scalar/PDSE.cpp

test/Transforms/DeadStoreElimination/pdse.ll

[PDSE] Add PDSE.
Needs RevisionPublic