Index: include/llvm/Transforms/Scalar/ReuseLoopExitValues.h =================================================================== --- include/llvm/Transforms/Scalar/ReuseLoopExitValues.h +++ include/llvm/Transforms/Scalar/ReuseLoopExitValues.h @@ -0,0 +1,31 @@ +//===- IndVarSimplify.h - Induction Variable Simplification -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the Induction Variable +// Simplification pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_REUSELOOPEXITVALUES_H +#define LLVM_TRANSFORMS_SCALAR_REUSELOOPEXITVALUES_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Loop; +class LPMUpdater; + +class ReuseLoopExitValuesPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_REUSELOOPEXITVALUES_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -152,6 +152,7 @@ #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" +#include "llvm/Transforms/Scalar/ReuseLoopExitValues.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/Scalarizer.h" @@ -972,9 +973,15 @@ // result too early. OptimizePM.addPass(LoopSinkPass()); - // And finally clean up LCSSA form before generating code. + // And finally clean up LCSSA form before generating code, and reuse + // expressions which compute values as close to their defs as possible. OptimizePM.addPass(InstSimplifyPass()); + // Reuse expressions which compute values as close to their defs as possible; + // this reverses the loop exit value canonical form we use at the IR level to + // use values computed in loops along exits. + OptimizePM.addPass(ReuseLoopExitValuesPass()); + // This hoists/decomposes div/rem ops. It should run after other sink/hoist // passes to avoid re-sinking, but before SimplifyCFG because it can allow // flattening of blocks. Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -247,6 +247,7 @@ FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true})) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("sancov-func", SanitizerCoveragePass()) +FUNCTION_PASS("reuse-loop-exit-values", ReuseLoopExitValuesPass()); #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -58,6 +58,7 @@ Reassociate.cpp Reg2Mem.cpp RewriteStatepointsForGC.cpp + ReuseLoopExitValues.cpp SCCP.cpp SROA.cpp Scalar.cpp Index: lib/Transforms/Scalar/ReuseLoopExitValues.cpp =================================================================== --- lib/Transforms/Scalar/ReuseLoopExitValues.cpp +++ lib/Transforms/Scalar/ReuseLoopExitValues.cpp @@ -0,0 +1,192 @@ +//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TODO: naming, comments, excess includes, etc... +// TODO: probably should rename to something more generic since there is +// nothing specifical to loop exit values in the code +// TODO: need to generalize to values computable along one (dominating) exit +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/ReuseLoopExitValues.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OrderedInstructions.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "reuse-loop-exit-values" + +namespace { +bool run(Function &F, + DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { + // Construct a mapping of SCEV @ Context to possible llvm::Value*s which + // compute the given expression. Note that we don't worry about which + // expressions are available at which program points, we just build a master + // list. + // TODO: need to remove duplicates w/a set + DenseMap, + SmallVector> AvailableExprs; + auto AddValueAtContext = [&](Value *V, Loop* L) { + if (!SE.isSCEVable(V->getType())) + return; + const SCEV *S = SE.getSCEV(V); + if (isa(S)) + return; + dbgs() << "Adding " << *S << "[" << L << "]"; + dbgs() << *V << "\n"; + + AvailableExprs[{S, L}].push_back(V); + while (L) { + L = L->getParentLoop(); + S = SE.getSCEVAtScope(S, L); + if (isa(S)) + break; + dbgs() << "Adding " << *S << "[" << L << "]"; + dbgs() << *V << "\n"; + AvailableExprs[{S, L}].push_back(V); + } + }; + for (Instruction &I : instructions(F)) { + Loop *L = LI.getLoopFor(I.getParent()); + AddValueAtContext(&I, L); + for (Value *Op : I.operands()) + AddValueAtContext(Op, L); + } + + // TODO: There is an important missing heurstic here. We do not *always* + // want to reverse this if it results in longer live ranges *within* a loop + // which could otherwise be folded. Thought required? + OrderedInstructions OI(&DT); + auto FindNearbyDef = [&](SmallVectorImpl& Defs, Use &U) { + // Find a def which is nearby (i.e. within the block containing the most + // dominating definitions). We don't care about *order wtthin that block*, + // and are simply trying to reduce cross block live ranges. + Instruction *UserI = cast(U.getUser()); + Value *Closest = U.get(); + for (Value *Def : Defs) { + Instruction *DefI = dyn_cast(Def); + if (!DefI) + // All values are equally distant + continue; + if (!OI.dominates(DefI, UserI)) + // Not a valid def for *this* use or in the use BB (ignored for + // efficiency reasons) + continue; + Instruction *ClosestI = dyn_cast(Closest); + if (!ClosestI || + OI.dominates(ClosestI, DefI)) + Closest = Def; + } + return Closest; + }; + + bool Changed = false; + for (Instruction &I : instructions(F)) { + Loop *L = LI.getLoopFor(I.getParent()); + for (Use &U : I.operands()) { + if (!SE.isSCEVable(U.get()->getType())) + continue; + const SCEV *S = SE.getSCEV(U.get()); + if (isa(S)) + continue; + auto &Defs = AvailableExprs[{S, L}]; + assert(!Defs.empty()); + dbgs() << "Considering " << *U.get() << " in " << I << "\n" + << " with defs: \n"; + for (Value *Def : Defs) + dbgs() << " -- " << *Def << "\n"; + Value *NewDef = FindNearbyDef(Defs, U); + if (NewDef == U.get()) + continue; + + // TODO: Kill instructions which become dead after this loop + U.set(NewDef); + Changed = true; + } + } + return Changed; +} +} + +PreservedAnalyses +ReuseLoopExitValuesPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &DT = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &SE = AM.getResult(F); + + dbgs() << "---------------\n"; + dbgs() << F.getName() << "\n"; + dbgs() << "---------------\n"; + + if (!::run(F, DT, LI, SE)) + return PreservedAnalyses::all(); + + auto PA = PreservedAnalyses::none(); + PA.preserveSet(); + return PA; +} +