diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -391,7 +391,7 @@ void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); -void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); +void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -224,7 +224,7 @@ (void) llvm::createMustBeExecutedContextPrinter(); (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); - (void) llvm::createScalarizeMaskedMemIntrinPass(); + (void)llvm::createScalarizeMaskedMemIntrinLegacyPass(); (void) llvm::createWarnMissedTransformationsPass(); (void) llvm::createHardwareLoopsPass(); (void) llvm::createInjectTLIMappingsLegacyPass(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -552,7 +552,7 @@ // createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather // and scatter intrinsics with scalar code when target doesn't support them. // -FunctionPass *createScalarizeMaskedMemIntrinPass(); +FunctionPass *createScalarizeMaskedMemIntrinLegacyPass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h @@ -0,0 +1,29 @@ +//===- ScalarizeMaskedMemIntrin.h - Scalarize unsupported masked mem ----===// +// instrinsics +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replaces masked memory intrinsics - when unsupported by the target +// - with a chain of basic blocks, that deal with the elements one-by-one if the +// appropriate mask bit is set. +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZE_MASKED_MEMINTRIN_H +#define LLVM_TRANSFORMS_SCALAR_SCALARIZE_MASKED_MEMINTRIN_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct ScalarizeMaskedMemIntrinPass + : public PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -710,7 +710,7 @@ // Add scalarization of target's unsupported masked memory intrinsics pass. // the unsupported intrinsic will be replaced with a chain of basic blocks, // that stores/loads element one-by-one if the appropriate mask bit is set. - addPass(createScalarizeMaskedMemIntrinPass()); + addPass(createScalarizeMaskedMemIntrinLegacyPass()); // Expand reduction intrinsics into shuffle sequences if the target wants to. // Allow disabling it for testing purposes. diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -194,6 +194,7 @@ #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -296,6 +296,7 @@ FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) FUNCTION_PASS("reg2mem", RegToMemPass()) +FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sccp", SCCPPass()) diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -95,7 +95,7 @@ initializeRedundantDbgInstEliminationPass(Registry); initializeRegToMemLegacyPass(Registry); initializeRewriteStatepointsForGCLegacyPassPass(Registry); - initializeScalarizeMaskedMemIntrinPass(Registry); + initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSCCPLegacyPassPass(Registry); initializeSROALegacyPassPass(Registry); initializeCFGSimplifyPassPass(Registry); diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp --- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" @@ -41,12 +42,13 @@ namespace { -class ScalarizeMaskedMemIntrin : public FunctionPass { +class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) { - initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry()); + explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) { + initializeScalarizeMaskedMemIntrinLegacyPassPass( + *PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -68,13 +70,13 @@ const TargetTransformInfo &TTI, const DataLayout &DL); -char ScalarizeMaskedMemIntrin::ID = 0; +char ScalarizeMaskedMemIntrinLegacyPass::ID = 0; -INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE, +INITIALIZE_PASS(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE, "Scalarize unsupported masked memory intrinsics", false, false) -FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { - return new ScalarizeMaskedMemIntrin(); +FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() { + return new ScalarizeMaskedMemIntrinLegacyPass(); } static bool isConstantIntVector(Value *Mask) { @@ -821,13 +823,10 @@ ModifiedDT = true; } -bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { +static bool runImpl(Function &F, const TargetTransformInfo &TTI) { bool EverMadeChange = false; - - auto &TTI = getAnalysis().getTTI(F); - auto &DL = F.getParent()->getDataLayout(); - bool MadeChange = true; + auto &DL = F.getParent()->getDataLayout(); while (MadeChange) { MadeChange = false; for (Function::iterator I = F.begin(); I != F.end();) { @@ -842,10 +841,24 @@ EverMadeChange |= MadeChange; } - return EverMadeChange; } +bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) { + auto &TTI = getAnalysis().getTTI(F); + return runImpl(F, TTI); +} + +PreservedAnalyses +ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult(F); + if (!runImpl(F, TTI)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL) { diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -5,6 +5,7 @@ ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE ; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32 ; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR +; RUN: opt -mtriple=x86_64-apple-darwin -passes=scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR ; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null @glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16 diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -319,7 +319,7 @@ initializeConstantHoistingLegacyPassPass(*Registry); initializeScalarOpts(*Registry); initializeVectorization(*Registry); - initializeScalarizeMaskedMemIntrinPass(*Registry); + initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry); initializeExpandReductionsPass(*Registry); initializeHardwareLoopsPass(*Registry); initializeTransformUtils(*Registry); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -467,18 +467,25 @@ "mips-", "lanai-", "hexagon-", "bpf-", "avr-", "thumb2-", "arm-", "si-", "gcn-", "amdgpu-", "aarch64-", "amdgcn-"}; std::vector PassNameContain = {"ehprepare"}; - std::vector PassNameExact = { - "safe-stack", "cost-model", - "codegenprepare", "interleaved-load-combine", - "unreachableblockelim", "scalarize-masked-mem-intrin", - "verify-safepoint-ir", "divergence", - "infer-address-spaces", "atomic-expand", - "hardware-loops", "type-promotion", - "mve-tail-predication", "interleaved-access", - "global-merge", "pre-isel-intrinsic-lowering", - "expand-reductions", "indirectbr-expand", - "generic-to-nvvm", "expandmemcmp", - "loop-reduce"}; + std::vector PassNameExact = {"safe-stack", + "cost-model", + "codegenprepare", + "interleaved-load-combine", + "unreachableblockelim", + "verify-safepoint-ir", + "divergence", + "infer-address-spaces", + "atomic-expand", + "hardware-loops", + "type-promotion", + "mve-tail-predication", + "interleaved-access", + "global-merge", + "pre-isel-intrinsic-lowering", + "expand-reductions", + "indirectbr-expand", + "generic-to-nvvm", + "expandmemcmp"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -534,7 +541,7 @@ // For codegen passes, only passes that do IR to IR transformation are // supported. initializeExpandMemCmpPassPass(Registry); - initializeScalarizeMaskedMemIntrinPass(Registry); + initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeAtomicExpandPass(Registry); initializeRewriteSymbolsLegacyPassPass(Registry);