diff --git a/clang/test/CodeGen/thinlto-loop-vectorize-pm.c b/clang/test/CodeGen/thinlto-loop-vectorize-pm.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/thinlto-loop-vectorize-pm.c @@ -0,0 +1,44 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -o %t.o -O2 -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s +// RUN: llvm-lto -thinlto -o %t %t.o + +// Test to ensure the loop vectorize codegen option is passed down to the +// ThinLTO backend. -vectorize-loops is a cc1 option and will be added +// automatically when O2/O3/Os is available for clang. Also check that +// "-mllvm -vectorize-loops=false" will disable loop vectorization, overriding +// the cc1 option. +// +// Check both the new and old PMs. +// +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O2-LPV +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NOLPV +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O2-NOLPV +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=O0-LPV +// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} +// O2-NOLPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} + +// Test to ensure the loop interleave codegen option is passed down to the +// ThinLTO backend. The internal loop interleave codegen option will be +// enabled automatically when O2/O3 is available for clang. Also check that +// "-mllvm -interleave-loops=false" will disable the interleaving, overriding +// the cc1 option. +// +// Check both the new and old PMs. +// +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-NoInterLeave +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -force-vector-width=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave +// O2-InterLeave-COUNT-2: store <2 x double> +// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1} +// O2-NoInterLeave-COUNT-1: store <2 x double> +// O2-NoInterLeave-NOT: store <2 x double> +// O2-NoInterLeave: = !{!"llvm.loop.isvectorized", i32 1} +// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1} + +void foo(double *a) { + for (int i = 0; i < 1000; i++) + a[i] = 10; +} diff --git a/clang/test/CodeGen/thinlto-slp-vectorize-pm.c b/clang/test/CodeGen/thinlto-slp-vectorize-pm.c --- a/clang/test/CodeGen/thinlto-slp-vectorize-pm.c +++ b/clang/test/CodeGen/thinlto-slp-vectorize-pm.c @@ -1,50 +1,27 @@ // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s +// RUN: %clang_cc1 -o %t.o -O2 -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s // RUN: llvm-lto -thinlto -o %t %t.o // Test to ensure the slp vectorize codegen option is passed down to the // ThinLTO backend. -vectorize-slp is a cc1 option and will be added -// automatically when O2/O3/Os/Oz is available for clang. Once -vectorize-slp -// is enabled, "-mllvm -vectorize-slp=false" won't disable slp vectorization -// currently. "-mllvm -vectorize-slp=false" is added here in the test to -// ensure the slp vectorization is executed because the -vectorize-slp cc1 -// flag is passed down, not because "-mllvm -vectorize-slp" is enabled -// by default. +// automatically when O2/O3/Os/Oz is available for clang. Also check that +// "-mllvm -vectorize-slp=false" will disable slp vectorization, overriding +// the cc1 option. // -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-SLP -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-SLP -// O2-SLP: Running pass: SLPVectorizerPass -// O0-SLP-NOT: Running pass: SLPVectorizerPass - -// Test to ensure the loop vectorize codegen option is passed down to the -// ThinLTO backend. -vectorize-loops is a cc1 option and will be added -// automatically when O2/O3/Os is available for clang. Once -vectorize-loops is -// enabled, "-mllvm -vectorize-loops=false" won't disable loop vectorization -// currently. "-mllvm -vectorize-loops=false" is added here in the test to -// ensure the loop vectorization is executed because the -vectorize-loops cc1 -// flag is passed down, not because "-mllvm -vectorize-loops" is enabled -// by default. -// -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV -// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} -// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} - -// Test to ensure the loop interleave codegen option is passed down to the -// ThinLTO backend. The internal loop interleave codegen option will be -// enabled automatically when O2/O3 is available for clang. Once the loop -// interleave option is enabled, "-mllvm -interleave-loops=false" won't disable -// the interleave. currently. "-mllvm -interleave-loops=false" is added here -// in the test to ensure the loop interleave is executed because the interleave -// codegen flag is passed down, not because "-mllvm -interleave-loops" is -// enabled by default. +// Check both the new and old PMs. // -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave -// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1} -// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1} +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=SLP +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -mllvm -vectorize-slp=false -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=NOSLP +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=SLP +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -vectorize-slp -mllvm -vectorize-slp=false -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s --check-prefix=NOSLP +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -vectorize-slp -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=NOSLP +// SLP: extractelement +// NOSLP-NOT: extractelement -void foo(double *a) { - for (int i = 0; i < 1000; i++) - a[i] = 10; +int foo(double *A, int n, int m) { + double sum = 0, v1 = 2, v0 = 3; + for (int i=0; i < n; ++i) + sum += 7*A[i*2] + 7*A[i*2+1]; + return sum; } + diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -73,16 +73,15 @@ /// can be set in the PassBuilder when using a LLVM as a library. PipelineTuningOptions(); - /// Tuning option to set loop interleaving on/off. Its default value is that - /// of the flag: `-interleave-loops`. + /// Tuning option to set loop interleaving on/off, set based on opt level. bool LoopInterleaving; - /// Tuning option to enable/disable loop vectorization. Its default value is - /// that of the flag: `-vectorize-loops`. + /// Tuning option to enable/disable loop vectorization, set based on opt + /// level. bool LoopVectorization; - /// Tuning option to enable/disable slp loop vectorization. Its default value - /// is that of the flag: `vectorize-slp`. + /// Tuning option to enable/disable slp loop vectorization, set based on opt + /// level. bool SLPVectorization; /// Tuning option to enable/disable loop unrolling. Its default value is true. diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -118,6 +118,7 @@ /// The LoopVectorize Pass. struct LoopVectorizePass : public PassInfoMixin { +private: /// If false, consider all loops for interleaving. /// If true, only loops that explicitly request interleaving are considered. bool InterleaveOnlyWhenForced; @@ -126,9 +127,8 @@ /// If true, only loops that explicitly request vectorization are considered. bool VectorizeOnlyWhenForced; - LoopVectorizePass(LoopVectorizeOptions Opts = {}) - : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced), - VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced) {} +public: + LoopVectorizePass(LoopVectorizeOptions Opts = {}); ScalarEvolution *SE; LoopInfo *LI; diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -55,8 +55,6 @@ } // end namespace slpvectorizer -extern cl::opt RunSLPVectorization; - struct SLPVectorizerPass : public PassInfoMixin { using StoreList = SmallVector; using StoreListMap = MapVector; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -242,9 +242,9 @@ cl::desc("Enable call graph profile pass for the new PM (default = on)")); PipelineTuningOptions::PipelineTuningOptions() { - LoopInterleaving = EnableLoopInterleaving; - LoopVectorization = EnableLoopVectorization; - SLPVectorization = RunSLPVectorization; + LoopInterleaving = true; + LoopVectorization = true; + SLPVectorization = false; LoopUnrolling = true; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; Coroutines = false; diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -159,9 +159,9 @@ LibraryInfo = nullptr; Inliner = nullptr; DisableUnrollLoops = false; - SLPVectorize = RunSLPVectorization; - LoopVectorize = EnableLoopVectorization; - LoopsInterleaved = EnableLoopInterleaving; + SLPVectorize = false; + LoopVectorize = true; + LoopsInterleaved = true; RerollLoops = RunLoopRerolling; NewGVN = RunNewGVN; LicmMssaOptCap = SetLicmMssaOptCap; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1590,9 +1590,8 @@ explicit LoopVectorize(bool InterleaveOnlyWhenForced = false, bool VectorizeOnlyWhenForced = false) - : FunctionPass(ID) { - Impl.InterleaveOnlyWhenForced = InterleaveOnlyWhenForced; - Impl.VectorizeOnlyWhenForced = VectorizeOnlyWhenForced; + : FunctionPass(ID), + Impl({InterleaveOnlyWhenForced, VectorizeOnlyWhenForced}) { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } @@ -7615,6 +7614,12 @@ return true; } +LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) + : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced | + !EnableLoopInterleaving), + VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced | + !EnableLoopVectorization) {} + bool LoopVectorizePass::processLoop(Loop *L) { assert((EnableVPlanNativePath || L->empty()) && "VPlan-native path is not enabled. Only process inner loops."); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -108,9 +108,8 @@ STATISTIC(NumVectorInstructions, "Number of vector instructions generated"); -cl::opt - llvm::RunSLPVectorization("vectorize-slp", cl::init(false), cl::Hidden, - cl::desc("Run the SLP vectorization passes")); +cl::opt RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden, + cl::desc("Run the SLP vectorization passes")); static cl::opt SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, @@ -5645,6 +5644,8 @@ LoopInfo *LI_, DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, OptimizationRemarkEmitter *ORE_) { + if (!RunSLPVectorization) + return false; SE = SE_; TTI = TTI_; TLI = TLI_; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -O3 -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=SLP -; RUN: opt < %s -O3 -disable-slp-vectorization -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP +; RUN: opt < %s -O3 -vectorize-slp=false -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSLP target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -180,11 +180,6 @@ cl::desc("Disable loop unrolling in all relevant passes"), cl::init(false)); -static cl::opt -DisableSLPVectorization("disable-slp-vectorization", - cl::desc("Disable the slp vectorization pass"), - cl::init(false)); - static cl::opt EmitSummaryIndex("module-summary", cl::desc("Emit module summary index"), cl::init(false)); @@ -406,18 +401,9 @@ Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ? DisableLoopUnrolling : OptLevel == 0; - // Check if vectorization is explicitly disabled via -vectorize-loops=false. - // The flag enables vectorization in the LoopVectorize pass, it is on by - // default, and if it was disabled, leave it disabled here. - // Another flag that exists: -loop-vectorize, controls adding the pass to the - // pass manager. If set, the pass is added, and there is no additional check - // here for it. - if (Builder.LoopVectorize) - Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; - - // When #pragma vectorize is on for SLP, do the same as above - Builder.SLPVectorize = - DisableSLPVectorization ? false : OptLevel > 1 && SizeLevel < 2; + Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; + + Builder.SLPVectorize = OptLevel > 1 && SizeLevel < 2; if (TM) TM->adjustPassManager(Builder);