Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -279,7 +279,7 @@ void initializeMemorySSAWrapperPassPass(PassRegistry&); void initializeMemorySanitizerLegacyPassPass(PassRegistry&); void initializeMergeFunctionsPass(PassRegistry&); -void initializeMergeICmpsPass(PassRegistry&); +void initializeMergeICmpsLegacyPassPass(PassRegistry &); void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); void initializeMetaRenamerPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -191,7 +191,7 @@ (void) llvm::createPostOrderFunctionAttrsLegacyPass(); (void) llvm::createReversePostOrderFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); - (void) llvm::createMergeICmpsPass(); + (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandMemCmpPass(); std::string buf; llvm::raw_string_ostream os(buf); Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -371,7 +371,7 @@ // // MergeICmps - Merge integer comparison chains into a memcmp // -Pass *createMergeICmpsPass(); +Pass *createMergeICmpsLegacyPass(); //===----------------------------------------------------------------------===// // Index: llvm/trunk/include/llvm/Transforms/Scalar/MergeICmps.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/MergeICmps.h +++ llvm/trunk/include/llvm/Transforms/Scalar/MergeICmps.h @@ -0,0 +1,25 @@ +//===- MergeICmps.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H +#define LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +struct MergeICmpsPass + : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H Index: llvm/trunk/lib/CodeGen/TargetPassConfig.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetPassConfig.cpp +++ llvm/trunk/lib/CodeGen/TargetPassConfig.cpp @@ -646,7 +646,7 @@ // into optimally-sized loads and compares. The transforms are enabled by a // target lowering hook. if (!DisableMergeICmps) - addPass(createMergeICmpsPass()); + addPass(createMergeICmpsLegacyPass()); addPass(createExpandMemCmpPass()); } Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -142,6 +142,7 @@ #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" Index: llvm/trunk/lib/Passes/PassRegistry.def =================================================================== --- llvm/trunk/lib/Passes/PassRegistry.def +++ llvm/trunk/lib/Passes/PassRegistry.def @@ -190,6 +190,7 @@ FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) +FUNCTION_PASS("mergeicmps", MergeICmpsPass()) FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) Index: llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp +++ llvm/trunk/lib/Transforms/Scalar/MergeICmps.cpp @@ -41,6 +41,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" @@ -214,19 +215,19 @@ // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp // instructions in the block. - bool canSplit(AliasAnalysis *AA) const; + bool canSplit(AliasAnalysis &AA) const; // Return true if this all the relevant instructions in the BCE-cmp-block can // be sunk below this instruction. By doing this, we know we can separate the // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the // block. bool canSinkBCECmpInst(const Instruction *, DenseSet &, - AliasAnalysis *AA) const; + AliasAnalysis &AA) const; // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block // instructions. Split the old block and move all non-BCE-cmp-insts into the // new parent block. - void split(BasicBlock *NewParent, AliasAnalysis *AA) const; + void split(BasicBlock *NewParent, AliasAnalysis &AA) const; // The basic block where this comparison happens. BasicBlock *BB = nullptr; @@ -245,7 +246,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst, DenseSet &BlockInsts, - AliasAnalysis *AA) const { + AliasAnalysis &AA) const { // If this instruction has side effects and its in middle of the BCE cmp block // instructions, then bail for now. if (Inst->mayHaveSideEffects()) { @@ -255,9 +256,9 @@ // Disallow stores that might alias the BCE operands MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI); MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI); - if (isModSet(AA->getModRefInfo(Inst, LLoc)) || - isModSet(AA->getModRefInfo(Inst, RLoc))) - return false; + if (isModSet(AA.getModRefInfo(Inst, LLoc)) || + isModSet(AA.getModRefInfo(Inst, RLoc))) + return false; } // Make sure this instruction does not use any of the BCE cmp block // instructions as operand. @@ -268,7 +269,7 @@ return true; } -void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const { +void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const { DenseSet BlockInsts( {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); llvm::SmallVector OtherInsts; @@ -288,7 +289,7 @@ } } -bool BCECmpBlock::canSplit(AliasAnalysis *AA) const { +bool BCECmpBlock::canSplit(AliasAnalysis &AA) const { DenseSet BlockInsts( {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI}); for (Instruction &Inst : *BB) { @@ -404,16 +405,16 @@ // A chain of comparisons. class BCECmpChain { public: - BCECmpChain(const std::vector &Blocks, PHINode &Phi, - AliasAnalysis *AA); + BCECmpChain(const std::vector &Blocks, PHINode &Phi, + AliasAnalysis &AA); - int size() const { return Comparisons_.size(); } + int size() const { return Comparisons_.size(); } #ifdef MERGEICMPS_DOT_ON void dump() const; #endif // MERGEICMPS_DOT_ON - bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA, + bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA, DomTreeUpdater &DTU); private: @@ -432,7 +433,7 @@ }; BCECmpChain::BCECmpChain(const std::vector &Blocks, PHINode &Phi, - AliasAnalysis *AA) + AliasAnalysis &AA) : Phi_(Phi) { assert(!Blocks.empty() && "a chain should have at least one block"); // Now look inside blocks to check for BCE comparisons. @@ -604,9 +605,8 @@ static BasicBlock *mergeComparisons(ArrayRef Comparisons, BasicBlock *const InsertBefore, BasicBlock *const NextCmpBlock, - PHINode &Phi, - const TargetLibraryInfo *const TLI, - AliasAnalysis *AA, DomTreeUpdater &DTU) { + PHINode &Phi, const TargetLibraryInfo &TLI, + AliasAnalysis &AA, DomTreeUpdater &DTU) { assert(!Comparisons.empty() && "merging zero comparisons"); LLVMContext &Context = NextCmpBlock->getContext(); const BCECmpBlock &FirstCmp = Comparisons[0]; @@ -652,7 +652,7 @@ Value *const MemCmpCall = emitMemCmp( Lhs, Rhs, ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder, - DL, TLI); + DL, &TLI); IsEqual = Builder.CreateICmpEQ( MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0)); } @@ -674,8 +674,8 @@ return BB; } -bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI, - AliasAnalysis *AA, DomTreeUpdater &DTU) { +bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA, + DomTreeUpdater &DTU) { assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain"); // First pass to check if there is at least one merge. If not, we don't do // anything and we keep analysis passes intact. @@ -694,9 +694,9 @@ // Effectively merge blocks. We go in the reverse direction from the phi block // so that the next block is always available to branch to. - const auto mergeRange = [this, TLI, AA, &DTU](int I, int Num, - BasicBlock *InsertBefore, - BasicBlock *Next) { + const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num, + BasicBlock *InsertBefore, + BasicBlock *Next) { return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num), InsertBefore, Next, Phi_, TLI, AA, DTU); }; @@ -790,8 +790,8 @@ return Blocks; } -bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI, - AliasAnalysis *AA, DomTreeUpdater &DTU) { +bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA, + DomTreeUpdater &DTU) { LLVM_DEBUG(dbgs() << "processPhi()\n"); if (Phi.getNumIncomingValues() <= 1) { LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n"); @@ -859,12 +859,40 @@ return CmpChain.simplify(TLI, AA, DTU); } -class MergeICmps : public FunctionPass { - public: +static bool runImpl(Function &F, const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, AliasAnalysis &AA, + DominatorTree *DT) { + LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n"); + + // We only try merging comparisons if the target wants to expand memcmp later. + // The rationale is to avoid turning small chains into memcmp calls. + if (!TTI.enableMemCmpExpansion(true)) + return false; + + // If we don't have memcmp avaiable we can't emit calls to it. + if (!TLI.has(LibFunc_memcmp)) + return false; + + DomTreeUpdater DTU(DT, /*PostDominatorTree*/ nullptr, + DomTreeUpdater::UpdateStrategy::Eager); + + bool MadeChange = false; + + for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { + // A Phi operation is always first in a basic block. + if (auto *const Phi = dyn_cast(&*BBIt->begin())) + MadeChange |= processPhi(*Phi, TLI, AA, DTU); + } + + return MadeChange; +} + +class MergeICmpsLegacyPass : public FunctionPass { +public: static char ID; - MergeICmps() : FunctionPass(ID) { - initializeMergeICmpsPass(*PassRegistry::getPassRegistry()); + MergeICmpsLegacyPass() : FunctionPass(ID) { + initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { @@ -874,12 +902,8 @@ // MergeICmps does not need the DominatorTree, but we update it if it's // already available. auto *DTWP = getAnalysisIfAvailable(); - DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr, - /*PostDominatorTree*/ nullptr, - DomTreeUpdater::UpdateStrategy::Eager); - AliasAnalysis *AA = &getAnalysis().getAAResults(); - auto PA = runImpl(F, &TLI, &TTI, AA, DTU); - return !PA.areAllPreserved(); + auto &AA = getAnalysis().getAAResults(); + return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr); } private: @@ -890,50 +914,32 @@ AU.addPreserved(); AU.addPreserved(); } - - PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, AliasAnalysis *AA, - DomTreeUpdater &DTU); }; -PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, - AliasAnalysis *AA, DomTreeUpdater &DTU) { - LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); - - // We only try merging comparisons if the target wants to expand memcmp later. - // The rationale is to avoid turning small chains into memcmp calls. - if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all(); +} // namespace - // If we don't have memcmp avaiable we can't emit calls to it. - if (!TLI->has(LibFunc_memcmp)) - return PreservedAnalyses::all(); +char MergeICmpsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps", + "Merge contiguous icmps into a memcmp", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps", + "Merge contiguous icmps into a memcmp", false, false) - bool MadeChange = false; +Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); } - for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { - // A Phi operation is always first in a basic block. - if (auto *const Phi = dyn_cast(&*BBIt->begin())) - MadeChange |= processPhi(*Phi, TLI, AA, DTU); - } - - if (!MadeChange) +PreservedAnalyses MergeICmpsPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &TLI = AM.getResult(F); + auto &TTI = AM.getResult(F); + auto &AA = AM.getResult(F); + auto *DT = AM.getCachedResult(F); + const bool MadeChanges = runImpl(F, TLI, TTI, AA, DT); + if (!MadeChanges) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); return PA; } - -} // namespace - -char MergeICmps::ID = 0; -INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", - "Merge contiguous icmps into a memcmp", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MergeICmps, "mergeicmps", - "Merge contiguous icmps into a memcmp", false, false) - -Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); } Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -83,7 +83,7 @@ initializeLowerGuardIntrinsicLegacyPassPass(Registry); initializeLowerWidenableConditionLegacyPassPass(Registry); initializeMemCpyOptLegacyPassPass(Registry); - initializeMergeICmpsPass(Registry); + initializeMergeICmpsLegacyPassPass(Registry); initializeMergedLoadStoreMotionLegacyPassPass(Registry); initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); Index: llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll =================================================================== --- llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ llvm/trunk/test/Transforms/MergeICmps/X86/pair-int32-int32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 +; RUN: opt < %s -passes='require,mergeicmps,verify' -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 ; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN %S = type { i32, i32 }