Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -365,7 +365,7 @@ void initializeSanitizerCoverageModulePass(PassRegistry&); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); -void initializeScalarizerPass(PassRegistry&); +void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); Index: include/llvm/Transforms/Scalar/Scalarizer.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/Scalarizer.h @@ -0,0 +1,30 @@ +//===- Scalarizer.h --- Scalarize vector operations -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts vector operations into scalar operations, in order +// to expose optimization opportunities on the individual scalar operations. +// It is mainly intended for targets that do not have vector units, but it +// may also be useful for revectorizing code to different vector widths. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZER_H +#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ScalarizerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} + +#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */ Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -139,6 +139,7 @@ #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -205,6 +205,7 @@ FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) +FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) FUNCTION_PASS("sink", SinkingPass()) Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -43,7 +43,7 @@ initializeDCELegacyPassPass(Registry); initializeDeadInstEliminationPass(Registry); initializeDivRemPairsLegacyPassPass(Registry); - initializeScalarizerPass(Registry); + initializeScalarizerLegacyPassPass(Registry); initializeDSELegacyPassPass(Registry); initializeGuardWideningLegacyPassPass(Registry); initializeLoopGuardWideningLegacyPassPass(Registry); Index: lib/Transforms/Scalar/Scalarizer.cpp =================================================================== --- lib/Transforms/Scalar/Scalarizer.cpp +++ lib/Transforms/Scalar/Scalarizer.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Options.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include #include #include @@ -50,6 +51,13 @@ #define DEBUG_TYPE "scalarizer" +// This is disabled by default because having separate loads and stores +// makes it more likely that the -combiner-alias-analysis limits will be +// reached. +static cl::opt + ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, + cl::desc("Allow the scalarizer pass to scalarize loads and store")); + namespace { // Used to store the scattered form of a vector. @@ -153,22 +161,10 @@ uint64_t ElemSize = 0; }; -class Scalarizer : public FunctionPass, - public InstVisitor { +class Scalarizer : public InstVisitor { public: - static char ID; - - Scalarizer() : FunctionPass(ID) { - initializeScalarizerPass(*PassRegistry::getPassRegistry()); - } - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage& AU) const override { - AU.addRequired(); - AU.addPreserved(); - } + bool doTransform(Function &F, DominatorTree *DT, unsigned ParallelLoopAccessMDKind); // InstVisitor methods. They return true if the instruction was scalarized, // false if nothing changed. @@ -186,16 +182,6 @@ bool visitStoreInst(StoreInst &SI); bool visitCallInst(CallInst &ICI); - static void registerOptions() { - // This is disabled by default because having separate loads and stores - // makes it more likely that the -combiner-alias-analysis limits will be - // reached. - OptionRegistry::registerOption( - "scalarize-load-store", - "Allow the scalarizer pass to scalarize loads and store", false); - } - private: Scatterer scatter(Instruction *Point, Value *V); void gather(Instruction *Op, const ValueVector &CV); @@ -211,20 +197,34 @@ ScatterMap Scattered; GatherList Gathered; - unsigned ParallelLoopAccessMDKind; - bool ScalarizeLoadStore; + unsigned ParallelLoopAccessMDKind; DominatorTree *DT; }; -} // end anonymous namespace +class ScalarizerLegacyPass : public FunctionPass { +public: + static char ID; + + ScalarizerLegacyPass() : FunctionPass(ID) { + initializeScalarizerLegacyPassPass(*PassRegistry::getPassRegistry()); + } -char Scalarizer::ID = 0; + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage& AU) const override { + AU.addRequired(); + AU.addPreserved(); + } +}; -INITIALIZE_PASS_WITH_OPTIONS_BEGIN(Scalarizer, "scalarizer", - "Scalarize vector operations", false, false) +} // end anonymous namespace + +char ScalarizerLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", + "Scalarize vector operations", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(Scalarizer, "scalarizer", +INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer", "Scalarize vector operations", false, false) Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, @@ -289,22 +289,29 @@ return CV[I]; } -bool Scalarizer::doInitialization(Module &M) { - ParallelLoopAccessMDKind = +bool ScalarizerLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + Scalarizer Impl; + Module &M = *F.getParent(); + unsigned ParallelLoopAccessMDKind = M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); - ScalarizeLoadStore = - M.getContext().getOption(); - return false; + DominatorTree *DT = &getAnalysis().getDomTree(); + return Impl.doTransform(F, DT, ParallelLoopAccessMDKind); } -bool Scalarizer::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; +FunctionPass *llvm::createScalarizerPass() { + return new ScalarizerLegacyPass(); +} - DT = &getAnalysis().getDomTree(); +bool Scalarizer::doTransform(Function &F, DominatorTree *DT, unsigned ParallelLoopAccessMDKind) { assert(Gathered.empty() && Scattered.empty()); + this->DT = DT; + this->ParallelLoopAccessMDKind = ParallelLoopAccessMDKind; + // To ensure we replace gathered components correctly we need to do an ordered // traversal of the basic blocks in the function. ReversePostOrderTraversal RPOT(&F.getEntryBlock()); @@ -839,6 +846,14 @@ return true; } -FunctionPass *llvm::createScalarizerPass() { - return new Scalarizer(); +PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) { + Scalarizer Impl; + Module &M = *F.getParent(); + unsigned ParallelLoopAccessMDKind = + M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); + DominatorTree *DT = &AM.getResult(F); + bool Modified = Impl.doTransform(F, DT, ParallelLoopAccessMDKind); + PreservedAnalyses PA; + PA.preserve(); + return Modified ? PA : PreservedAnalyses::all(); } Index: test/Transforms/Scalarizer/basic.ll =================================================================== --- test/Transforms/Scalarizer/basic.ll +++ test/Transforms/Scalarizer/basic.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s +; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" declare <4 x float> @ext(<4 x float>)