Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -236,7 +236,7 @@ void initializeModuleDebugInfoPrinterPass(PassRegistry&); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry &); void initializeNameAnonFunctionPass(PassRegistry &); -void initializeNaryReassociatePass(PassRegistry&); +void initializeNaryReassociateLegacyPassPass(PassRegistry &); void initializeNoAAPass(PassRegistry&); void initializeObjCARCAAWrapperPassPass(PassRegistry&); void initializeObjCARCAPElimPass(PassRegistry&); Index: include/llvm/Transforms/Scalar/NaryReassociate.h =================================================================== --- include/llvm/Transforms/Scalar/NaryReassociate.h +++ include/llvm/Transforms/Scalar/NaryReassociate.h @@ -0,0 +1,174 @@ +//===- NaryReassociate.h - Reassociate n-ary expressions ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass reassociates n-ary add expressions and eliminates the redundancy +// exposed by the reassociation. +// +// A motivating example: +// +// void foo(int a, int b) { +// bar(a + b); +// bar((a + 2) + b); +// } +// +// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify +// the above code to +// +// int t = a + b; +// bar(t); +// bar(t + 2); +// +// However, the Reassociate pass is unable to do that because it processes each +// instruction individually and believes (a + 2) + b is the best form according +// to its rank system. +// +// To address this limitation, NaryReassociate reassociates an expression in a +// form that reuses existing instructions. As a result, NaryReassociate can +// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that +// (a + b) is computed before. +// +// NaryReassociate works as follows. For every instruction in the form of (a + +// b) + c, it checks whether a + c or b + c is already computed by a dominating +// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + +// c) + a and removes the redundancy accordingly. To efficiently look up whether +// an expression is computed before, we store each instruction seen and its SCEV +// into an SCEV-to-instruction map. +// +// Although the algorithm pattern-matches only ternary additions, it +// automatically handles many >3-ary expressions by walking through the function +// in the depth-first order. For example, given +// +// (a + c) + d +// ((a + b) + c) + d +// +// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites +// ((a + c) + b) + d into ((a + c) + d) + b. +// +// Finally, the above dominator-based algorithm may need to be run multiple +// iterations before emitting optimal code. One source of this need is that we +// only split an operand when it is used only once. The above algorithm can +// eliminate an instruction and decrease the usage count of its operands. As a +// result, an instruction that previously had multiple uses may become a +// single-use instruction and thus eligible for split consideration. For +// example, +// +// ac = a + c +// ab = a + b +// abc = ab + c +// ab2 = ab + b +// ab2c = ab2 + c +// +// In the first iteration, we cannot reassociate abc to ac+b because ab is used +// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a +// result, ab2 becomes dead and ab will be used only once in the second +// iteration. +// +// Limitations and TODO items: +// +// 1) We only considers n-ary adds and muls for now. This should be extended +// and generalized. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H +#define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class NaryReassociatePass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM. + bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, + ScalarEvolution *SE_, TargetLibraryInfo *TLI_, + TargetTransformInfo *TTI_); + +private: + // Runs only one iteration of the dominator-based algorithm. See the header + // comments for why we need multiple iterations. + bool doOneIteration(Function &F); + + // Reassociates I for better CSE. + Instruction *tryReassociate(Instruction *I); + + // Reassociate GEP for better CSE. + Instruction *tryReassociateGEP(GetElementPtrInst *GEP); + // Try splitting GEP at the I-th index and see whether either part can be + // CSE'ed. This is a helper function for tryReassociateGEP. + // + // \p IndexedType The element type indexed by GEP's I-th index. This is + // equivalent to + // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, + // ..., i-th index). + GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, + unsigned I, Type *IndexedType); + // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or + // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. + GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, + unsigned I, Value *LHS, + Value *RHS, Type *IndexedType); + + // Reassociate binary operators for better CSE. + Instruction *tryReassociateBinaryOp(BinaryOperator *I); + + // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly + // passed. + Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, + BinaryOperator *I); + // Rewrites I to (LHS op RHS) if LHS is computed already. + Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, + BinaryOperator *I); + + // Tries to match Op1 and Op2 by using V. + bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); + + // Gets SCEV for (LHS op RHS). + const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, + const SCEV *RHS); + + // Returns the closest dominator of \c Dominatee that computes + // \c CandidateExpr. Returns null if not found. + Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, + Instruction *Dominatee); + // GetElementPtrInst implicitly sign-extends an index if the index is shorter + // than the pointer size. This function returns whether Index is shorter than + // GEP's pointer size, i.e., whether Index needs to be sign-extended in order + // to be an index of GEP. + bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); + + AssumptionCache *AC; + const DataLayout *DL; + DominatorTree *DT; + ScalarEvolution *SE; + TargetLibraryInfo *TLI; + TargetTransformInfo *TTI; + // A lookup table quickly telling which instructions compute the given SCEV. + // Note that there can be multiple instructions at different locations + // computing to the same SCEV, so we map a SCEV to an instruction list. For + // example, + // + // if (p1) + // foo(a + b); + // if (p2) + // bar(a + b); + DenseMap> SeenExprs; +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -100,6 +100,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" +#include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/SCCP.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -148,6 +148,7 @@ FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) +FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) Index: lib/Transforms/Scalar/NaryReassociate.cpp =================================================================== --- lib/Transforms/Scalar/NaryReassociate.cpp +++ lib/Transforms/Scalar/NaryReassociate.cpp @@ -76,12 +76,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" @@ -94,16 +90,15 @@ #define DEBUG_TYPE "nary-reassociate" namespace { -class NaryReassociate : public FunctionPass { +class NaryReassociateLegacyPass : public FunctionPass { public: static char ID; - NaryReassociate(): FunctionPass(ID) { - initializeNaryReassociatePass(*PassRegistry::getPassRegistry()); + NaryReassociateLegacyPass() : FunctionPass(ID) { + initializeNaryReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); } bool doInitialization(Module &M) override { - DL = &M.getDataLayout(); return false; } bool runOnFunction(Function &F) override; @@ -121,101 +116,66 @@ } private: - // Runs only one iteration of the dominator-based algorithm. See the header - // comments for why we need multiple iterations. - bool doOneIteration(Function &F); - - // Reassociates I for better CSE. - Instruction *tryReassociate(Instruction *I); - - // Reassociate GEP for better CSE. - Instruction *tryReassociateGEP(GetElementPtrInst *GEP); - // Try splitting GEP at the I-th index and see whether either part can be - // CSE'ed. This is a helper function for tryReassociateGEP. - // - // \p IndexedType The element type indexed by GEP's I-th index. This is - // equivalent to - // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, - // ..., i-th index). - GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, - unsigned I, Type *IndexedType); - // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or - // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. - GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, - unsigned I, Value *LHS, - Value *RHS, Type *IndexedType); - - // Reassociate binary operators for better CSE. - Instruction *tryReassociateBinaryOp(BinaryOperator *I); - - // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly - // passed. - Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, - BinaryOperator *I); - // Rewrites I to (LHS op RHS) if LHS is computed already. - Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, - BinaryOperator *I); - - // Tries to match Op1 and Op2 by using V. - bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); - - // Gets SCEV for (LHS op RHS). - const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, - const SCEV *RHS); - - // Returns the closest dominator of \c Dominatee that computes - // \c CandidateExpr. Returns null if not found. - Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, - Instruction *Dominatee); - // GetElementPtrInst implicitly sign-extends an index if the index is shorter - // than the pointer size. This function returns whether Index is shorter than - // GEP's pointer size, i.e., whether Index needs to be sign-extended in order - // to be an index of GEP. - bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); - - AssumptionCache *AC; - const DataLayout *DL; - DominatorTree *DT; - ScalarEvolution *SE; - TargetLibraryInfo *TLI; - TargetTransformInfo *TTI; - // A lookup table quickly telling which instructions compute the given SCEV. - // Note that there can be multiple instructions at different locations - // computing to the same SCEV, so we map a SCEV to an instruction list. For - // example, - // - // if (p1) - // foo(a + b); - // if (p2) - // bar(a + b); - DenseMap> SeenExprs; + NaryReassociatePass Impl; }; } // anonymous namespace -char NaryReassociate::ID = 0; -INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation", - false, false) +char NaryReassociateLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate", + "Nary reassociation", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation", - false, false) +INITIALIZE_PASS_END(NaryReassociateLegacyPass, "nary-reassociate", + "Nary reassociation", false, false) FunctionPass *llvm::createNaryReassociatePass() { - return new NaryReassociate(); + return new NaryReassociateLegacyPass(); } -bool NaryReassociate::runOnFunction(Function &F) { +bool NaryReassociateLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) return false; - AC = &getAnalysis().getAssumptionCache(F); - DT = &getAnalysis().getDomTree(); - SE = &getAnalysis().getSE(); - TLI = &getAnalysis().getTLI(); - TTI = &getAnalysis().getTTI(F); + auto *AC = &getAnalysis().getAssumptionCache(F); + auto *DT = &getAnalysis().getDomTree(); + auto *SE = &getAnalysis().getSE(); + auto *TLI = &getAnalysis().getTLI(); + auto *TTI = &getAnalysis().getTTI(F); + + return Impl.runImpl(F, AC, DT, SE, TLI, TTI); +} + +PreservedAnalyses NaryReassociatePass::run(Function &F, + FunctionAnalysisManager &AM) { + auto *AC = &AM.getResult(F); + auto *DT = &AM.getResult(F); + auto *SE = &AM.getResult(F); + auto *TLI = &AM.getResult(F); + auto *TTI = &AM.getResult(F); + + bool Changed = runImpl(F, AC, DT, SE, TLI, TTI); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} + +bool NaryReassociatePass::runImpl(Function &F, AssumptionCache *AC_, + DominatorTree *DT_, ScalarEvolution *SE_, + TargetLibraryInfo *TLI_, + TargetTransformInfo *TTI_) { + AC = AC_; + DT = DT_; + SE = SE_; + TLI = TLI_; + TTI = TTI_; + DL = &F.getParent()->getDataLayout(); bool Changed = false, ChangedInThisIteration; do { @@ -237,7 +197,7 @@ } } -bool NaryReassociate::doOneIteration(Function &F) { +bool NaryReassociatePass::doOneIteration(Function &F) { bool Changed = false; SeenExprs.clear(); // Process the basic blocks in pre-order of the dominator tree. This order @@ -287,7 +247,7 @@ return Changed; } -Instruction *NaryReassociate::tryReassociate(Instruction *I) { +Instruction *NaryReassociatePass::tryReassociate(Instruction *I) { switch (I->getOpcode()) { case Instruction::Add: case Instruction::Mul: @@ -308,7 +268,7 @@ Indices) == TargetTransformInfo::TCC_Free; } -Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) { +Instruction *NaryReassociatePass::tryReassociateGEP(GetElementPtrInst *GEP) { // Not worth reassociating GEP if it is foldable. if (isGEPFoldable(GEP, TTI)) return nullptr; @@ -324,16 +284,16 @@ return nullptr; } -bool NaryReassociate::requiresSignExtension(Value *Index, - GetElementPtrInst *GEP) { +bool NaryReassociatePass::requiresSignExtension(Value *Index, + GetElementPtrInst *GEP) { unsigned PointerSizeInBits = DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace()); return cast(Index->getType())->getBitWidth() < PointerSizeInBits; } GetElementPtrInst * -NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, - Type *IndexedType) { +NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, + unsigned I, Type *IndexedType) { Value *IndexToSplit = GEP->getOperand(I + 1); if (SExtInst *SExt = dyn_cast(IndexToSplit)) { IndexToSplit = SExt->getOperand(0); @@ -366,9 +326,10 @@ return nullptr; } -GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex( - GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS, - Type *IndexedType) { +GetElementPtrInst * +NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, + unsigned I, Value *LHS, + Value *RHS, Type *IndexedType) { // Look for GEP's closest dominator that has the same SCEV as GEP except that // the I-th index is replaced with LHS. SmallVector IndexExprs; @@ -437,7 +398,7 @@ return NewGEP; } -Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) { +Instruction *NaryReassociatePass::tryReassociateBinaryOp(BinaryOperator *I) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I)) return NewI; @@ -446,8 +407,8 @@ return nullptr; } -Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS, - BinaryOperator *I) { +Instruction *NaryReassociatePass::tryReassociateBinaryOp(Value *LHS, Value *RHS, + BinaryOperator *I) { Value *A = nullptr, *B = nullptr; // To be conservative, we reassociate I only when it is the only user of (A op // B). @@ -470,9 +431,9 @@ return nullptr; } -Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr, - Value *RHS, - BinaryOperator *I) { +Instruction *NaryReassociatePass::tryReassociatedBinaryOp(const SCEV *LHSExpr, + Value *RHS, + BinaryOperator *I) { // Look for the closest dominator LHS of I that computes LHSExpr, and replace // I with LHS op RHS. auto *LHS = findClosestMatchingDominator(LHSExpr, I); @@ -494,8 +455,8 @@ return NewI; } -bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, - Value *&Op2) { +bool NaryReassociatePass::matchTernaryOp(BinaryOperator *I, Value *V, + Value *&Op1, Value *&Op2) { switch (I->getOpcode()) { case Instruction::Add: return match(V, m_Add(m_Value(Op1), m_Value(Op2))); @@ -507,8 +468,9 @@ return false; } -const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS, - const SCEV *RHS) { +const SCEV *NaryReassociatePass::getBinarySCEV(BinaryOperator *I, + const SCEV *LHS, + const SCEV *RHS) { switch (I->getOpcode()) { case Instruction::Add: return SE->getAddExpr(LHS, RHS); @@ -521,8 +483,8 @@ } Instruction * -NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr, - Instruction *Dominatee) { +NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr, + Instruction *Dominatee) { auto Pos = SeenExprs.find(CandidateExpr); if (Pos == SeenExprs.end()) return nullptr; Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -67,7 +67,7 @@ initializeLowerGuardIntrinsicPass(Registry); initializeMemCpyOptLegacyPassPass(Registry); initializeMergedLoadStoreMotionLegacyPassPass(Registry); - initializeNaryReassociatePass(Registry); + initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); initializeRegToMemPass(Registry); Index: test/Transforms/NaryReassociate/nary-add.ll =================================================================== --- test/Transforms/NaryReassociate/nary-add.ll +++ test/Transforms/NaryReassociate/nary-add.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" Index: test/Transforms/NaryReassociate/nary-mul.ll =================================================================== --- test/Transforms/NaryReassociate/nary-mul.ll +++ test/Transforms/NaryReassociate/nary-mul.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" Index: test/Transforms/NaryReassociate/pr24301.ll =================================================================== --- test/Transforms/NaryReassociate/pr24301.ll +++ test/Transforms/NaryReassociate/pr24301.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -nary-reassociate -S | FileCheck %s +; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s define i32 @foo(i32 %tmp4) { ; CHECK-LABEL: @foo(