Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -355,7 +355,7 @@ void initializeSanitizerCoverageModulePass(PassRegistry&); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); -void initializeScalarizerPass(PassRegistry&); +void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -50,6 +50,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -393,12 +393,6 @@ //===----------------------------------------------------------------------===// // -// ScalarizerPass - Converts vector operations into scalar operations -// -FunctionPass *createScalarizerPass(); - -//===----------------------------------------------------------------------===// -// // SeparateConstOffsetFromGEP - Split GEPs for better CSE // FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false); Index: include/llvm/Transforms/Scalar/Scalarizer.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/Scalarizer.h @@ -0,0 +1,35 @@ +//===- Scalarizer.h --- Scalarize vector operations -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass converts vector operations into scalar operations, in order +/// to expose optimization opportunities on the individual scalar operations. +/// It is mainly intended for targets that do not have vector units, but it +/// may also be useful for revectorizing code to different vector widths. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZER_H +#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ScalarizerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Create a legacy pass manager instance of the Scalarizer pass +FunctionPass *createScalarizerPass(); + +} + +#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */ Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -139,6 +139,7 @@ #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -205,6 +205,7 @@ FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) +FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) FUNCTION_PASS("sink", SinkingPass()) Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" @@ -43,7 +44,7 @@ initializeDCELegacyPassPass(Registry); initializeDeadInstEliminationPass(Registry); initializeDivRemPairsLegacyPassPass(Registry); - initializeScalarizerPass(Registry); + initializeScalarizerLegacyPassPass(Registry); initializeDSELegacyPassPass(Registry); initializeGuardWideningLegacyPassPass(Registry); initializeLoopGuardWideningLegacyPassPass(Registry); Index: lib/Transforms/Scalar/Scalarizer.cpp =================================================================== --- lib/Transforms/Scalar/Scalarizer.cpp +++ lib/Transforms/Scalar/Scalarizer.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Options.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" #include #include #include @@ -49,6 +50,13 @@ #define DEBUG_TYPE "scalarizer" +// This is disabled by default because having separate loads and stores +// makes it more likely that the -combiner-alias-analysis limits will be +// reached. +static cl::opt + ScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, + cl::desc("Allow the scalarizer pass to scalarize loads and store")); + namespace { // Used to store the scattered form of a vector. @@ -152,17 +160,13 @@ uint64_t ElemSize = 0; }; -class Scalarizer : public FunctionPass, - public InstVisitor { +class ScalarizerVisitor : public InstVisitor { public: - static char ID; - - Scalarizer() : FunctionPass(ID) { - initializeScalarizerPass(*PassRegistry::getPassRegistry()); + ScalarizerVisitor(unsigned ParallelLoopAccessMDKind) + : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind) { } - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; + bool transform(Function &F); // InstVisitor methods. They return true if the instruction was scalarized, // false if nothing changed. @@ -180,16 +184,6 @@ bool visitStoreInst(StoreInst &SI); bool visitCallInst(CallInst &ICI); - static void registerOptions() { - // This is disabled by default because having separate loads and stores - // makes it more likely that the -combiner-alias-analysis limits will be - // reached. - OptionRegistry::registerOption( - "scalarize-load-store", - "Allow the scalarizer pass to scalarize loads and store", false); - } - private: Scatterer scatter(Instruction *Point, Value *V); void gather(Instruction *Op, const ValueVector &CV); @@ -205,16 +199,28 @@ ScatterMap Scattered; GatherList Gathered; + unsigned ParallelLoopAccessMDKind; - bool ScalarizeLoadStore; }; -} // end anonymous namespace +class ScalarizerLegacyPass : public FunctionPass { +public: + static char ID; + + ScalarizerLegacyPass() : FunctionPass(ID) { + initializeScalarizerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +}; -char Scalarizer::ID = 0; +} // end anonymous namespace -INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer", - "Scalarize vector operations", false, false) +char ScalarizerLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer", + "Scalarize vector operations", false, false) +INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer", + "Scalarize vector operations", false, false) Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, ValueVector *cachePtr) @@ -278,17 +284,22 @@ return CV[I]; } -bool Scalarizer::doInitialization(Module &M) { - ParallelLoopAccessMDKind = +bool ScalarizerLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + Module &M = *F.getParent(); + unsigned ParallelLoopAccessMDKind = M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); - ScalarizeLoadStore = - M.getContext().getOption(); - return false; + ScalarizerVisitor Impl(ParallelLoopAccessMDKind); + return Impl.transform(F); } -bool Scalarizer::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; +FunctionPass *llvm::createScalarizerPass() { + return new ScalarizerLegacyPass(); +} + +bool ScalarizerVisitor::transform(Function &F) { assert(Gathered.empty() && Scattered.empty()); // To ensure we replace gathered components correctly we need to do an ordered @@ -308,7 +319,7 @@ // Return a scattered form of V that can be accessed by Point. V must be a // vector or a pointer to a vector. -Scatterer Scalarizer::scatter(Instruction *Point, Value *V) { +Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) { if (Argument *VArg = dyn_cast(V)) { // Put the scattered form of arguments in the entry block, // so that it can be used everywhere. @@ -332,7 +343,7 @@ // deletion of Op and creation of the gathered form to the end of the pass, // so that we can avoid creating the gathered form if all uses of Op are // replaced with uses of CV. -void Scalarizer::gather(Instruction *Op, const ValueVector &CV) { +void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) { // Since we're not deleting Op yet, stub out its operands, so that it // doesn't make anything live unnecessarily. for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) @@ -361,7 +372,7 @@ // Return true if it is safe to transfer the given metadata tag from // vector to scalar instructions. -bool Scalarizer::canTransferMetadata(unsigned Tag) { +bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) { return (Tag == LLVMContext::MD_tbaa || Tag == LLVMContext::MD_fpmath || Tag == LLVMContext::MD_tbaa_struct @@ -373,7 +384,7 @@ // Transfer metadata from Op to the instructions in CV if it is known // to be safe to do so. -void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) { +void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) { SmallVector, 4> MDs; Op->getAllMetadataOtherThanDebugLoc(MDs); for (unsigned I = 0, E = CV.size(); I != E; ++I) { @@ -389,7 +400,7 @@ // Try to fill in Layout from Ty, returning true on success. Alignment is // the alignment of the vector, or 0 if the ABI default should be used. -bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, +bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout, const DataLayout &DL) { // Make sure we're dealing with a vector. Layout.VecTy = dyn_cast(Ty); @@ -413,7 +424,7 @@ // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) // to create an instruction like I with operands X and Y and name Name. template -bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { +bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { VectorType *VT = dyn_cast(I.getType()); if (!VT) return false; @@ -446,7 +457,7 @@ /// If a call to a vector typed intrinsic function, split into a scalar call per /// element if possible for the intrinsic. -bool Scalarizer::splitCall(CallInst &CI) { +bool ScalarizerVisitor::splitCall(CallInst &CI) { VectorType *VT = dyn_cast(CI.getType()); if (!VT) return false; @@ -504,7 +515,7 @@ return true; } -bool Scalarizer::visitSelectInst(SelectInst &SI) { +bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) { VectorType *VT = dyn_cast(SI.getType()); if (!VT) return false; @@ -534,19 +545,19 @@ return true; } -bool Scalarizer::visitICmpInst(ICmpInst &ICI) { +bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) { return splitBinary(ICI, ICmpSplitter(ICI)); } -bool Scalarizer::visitFCmpInst(FCmpInst &FCI) { +bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) { return splitBinary(FCI, FCmpSplitter(FCI)); } -bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { +bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) { return splitBinary(BO, BinarySplitter(BO)); } -bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { +bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { VectorType *VT = dyn_cast(GEPI.getType()); if (!VT) return false; @@ -592,7 +603,7 @@ return true; } -bool Scalarizer::visitCastInst(CastInst &CI) { +bool ScalarizerVisitor::visitCastInst(CastInst &CI) { VectorType *VT = dyn_cast(CI.getDestTy()); if (!VT) return false; @@ -610,7 +621,7 @@ return true; } -bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { +bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) { VectorType *DstVT = dyn_cast(BCI.getDestTy()); VectorType *SrcVT = dyn_cast(BCI.getSrcTy()); if (!DstVT || !SrcVT) @@ -665,7 +676,7 @@ return true; } -bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { +bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { VectorType *VT = dyn_cast(SVI.getType()); if (!VT) return false; @@ -689,7 +700,7 @@ return true; } -bool Scalarizer::visitPHINode(PHINode &PHI) { +bool ScalarizerVisitor::visitPHINode(PHINode &PHI) { VectorType *VT = dyn_cast(PHI.getType()); if (!VT) return false; @@ -714,7 +725,7 @@ return true; } -bool Scalarizer::visitLoadInst(LoadInst &LI) { +bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) { if (!ScalarizeLoadStore) return false; if (!LI.isSimple()) @@ -738,7 +749,7 @@ return true; } -bool Scalarizer::visitStoreInst(StoreInst &SI) { +bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) { if (!ScalarizeLoadStore) return false; if (!SI.isSimple()) @@ -765,13 +776,13 @@ return true; } -bool Scalarizer::visitCallInst(CallInst &CI) { +bool ScalarizerVisitor::visitCallInst(CallInst &CI) { return splitCall(CI); } // Delete the instructions that we scalarized. If a full vector result // is still needed, recreate it using InsertElements. -bool Scalarizer::finish() { +bool ScalarizerVisitor::finish() { // The presence of data in Gathered or Scattered indicates changes // made to the Function. if (Gathered.empty() && Scattered.empty()) @@ -802,6 +813,11 @@ return true; } -FunctionPass *llvm::createScalarizerPass() { - return new Scalarizer(); +PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) { + Module &M = *F.getParent(); + unsigned ParallelLoopAccessMDKind = + M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); + ScalarizerVisitor Impl(ParallelLoopAccessMDKind); + (void)Impl.transform(F); + return PreservedAnalyses::none(); } Index: test/Transforms/Scalarizer/basic.ll =================================================================== --- test/Transforms/Scalarizer/basic.ll +++ test/Transforms/Scalarizer/basic.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s +; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" declare <4 x float> @ext(<4 x float>) Index: test/Transforms/Scalarizer/cache-bug.ll =================================================================== --- test/Transforms/Scalarizer/cache-bug.ll +++ test/Transforms/Scalarizer/cache-bug.ll @@ -1,4 +1,5 @@ ; RUN: opt -scalarizer -S < %s | FileCheck %s +; RUN: opt -passes='function(scalarizer)' -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" Index: test/Transforms/Scalarizer/crash-bug.ll =================================================================== --- test/Transforms/Scalarizer/crash-bug.ll +++ test/Transforms/Scalarizer/crash-bug.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -scalarizer -S -o - | FileCheck %s +; RUN: opt %s -passes='function(scalarizer)' -S -o - | FileCheck %s ; Don't crash Index: test/Transforms/Scalarizer/dbginfo.ll =================================================================== --- test/Transforms/Scalarizer/dbginfo.ll +++ test/Transforms/Scalarizer/dbginfo.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s +; RUN: opt %s -passes='function(scalarizer)' -scalarize-load-store -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" ; Function Attrs: nounwind uwtable Index: test/Transforms/Scalarizer/dbgloc-bug.ll =================================================================== --- test/Transforms/Scalarizer/dbgloc-bug.ll +++ test/Transforms/Scalarizer/dbgloc-bug.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -march=x86 -scalarizer %s | FileCheck %s +; RUN: opt -S -march=x86 -passes='function(scalarizer)' %s | FileCheck %s ; Reproducer for pr27938 ; https://llvm.org/bugs/show_bug.cgi?id=27938 Index: test/Transforms/Scalarizer/intrinsics.ll =================================================================== --- test/Transforms/Scalarizer/intrinsics.ll +++ test/Transforms/Scalarizer/intrinsics.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -scalarizer %s | FileCheck %s +; RUN: opt -S -passes='function(scalarizer)' %s | FileCheck %s ; Unary fp declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) Index: test/Transforms/Scalarizer/order-bug.ll =================================================================== --- test/Transforms/Scalarizer/order-bug.ll +++ test/Transforms/Scalarizer/order-bug.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -scalarizer -S -o - | FileCheck %s +; RUN: opt %s -passes='function(scalarizer)' -S -o - | FileCheck %s ; This input caused the scalarizer to replace & erase gathered results when ; future gathered results depended on them being alive Index: test/Transforms/Scalarizer/phi-bug.ll =================================================================== --- test/Transforms/Scalarizer/phi-bug.ll +++ test/Transforms/Scalarizer/phi-bug.ll @@ -1,4 +1,5 @@ ; RUN: opt %s -scalarizer -verify -S -o - | FileCheck %s +; RUN: opt %s -passes='function(scalarizer,verify)' -S -o - | FileCheck %s define void @f3() local_unnamed_addr { bb1: Index: test/Transforms/Scalarizer/store-bug.ll =================================================================== --- test/Transforms/Scalarizer/store-bug.ll +++ test/Transforms/Scalarizer/store-bug.ll @@ -1,4 +1,5 @@ ; RUN: opt -scalarizer -scalarize-load-store -S < %s | FileCheck %s +; RUN: opt -passes='function(scalarizer)' -scalarize-load-store -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" ; This input caused the scalarizer not to clear cached results Index: test/Transforms/Scalarizer/vector-gep.ll =================================================================== --- test/Transforms/Scalarizer/vector-gep.ll +++ test/Transforms/Scalarizer/vector-gep.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -scalarizer %s | FileCheck %s +; RUN: opt -S -passes='function(scalarizer)' %s | FileCheck %s ; Check that the scalarizer can handle vector GEPs with scalar indices