Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -1440,6 +1440,12 @@ Conf.OptLevel = CGOpts.OptimizationLevel; initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); + Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops; + // For historical reasons, loop interleaving is set to mirror setting for loop + // unrolling. + Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops; + Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop; + Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP; // Context sensitive profile. if (CGOpts.hasProfileCSIRInstr()) { Index: clang/test/CodeGen/thinlto-slp-vectorize-pm.c =================================================================== --- /dev/null +++ clang/test/CodeGen/thinlto-slp-vectorize-pm.c @@ -0,0 +1,48 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -o %t.o -flto=thin -fexperimental-new-pass-manager -triple x86_64-unknown-linux-gnu -emit-llvm-bc %s +// RUN: llvm-lto -thinlto -o %t %t.o + +// Test to ensure the slp vectorize codegen option is passed down to the +// ThinLTO backend. -vectorize-slp is a cc1 option and will be added +// automatically when O2/O3/Os/Oz is available for clang. Once -vectorize-slp +// is enabled, "-mllvm -vectorize-slp=false" won't disable slp vectorization +// currently. "-mllvm -vectorize-slp=false" is added here in the test to +// ensure the slp vectorization is executed because the -vectorize-slp cc1 +// flag is passed down, not because "-mllvm -vectorize-slp" is enabled. +// +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-SLP +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-slp -mllvm -vectorize-slp=false -o %t2.o -x ir %t.o -fthinlto-index=%t.thinlto.bc -fdebug-pass-manager -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-SLP +// O2-SLP: Running pass: SLPVectorizerPass +// O0-SLP-NOT: Running pass: SLPVectorizerPass + +// Test to ensure the loop vectorize codegen option is passed down to the +// ThinLTO backend. -vectorize-loops is a cc1 option and will be added +// automatically when O2/O3/Os is available for clang. Once -vectorize-loops is +// enabled, "-mllvm -vectorize-loops=false" won't disable loop vectorization +// currently. "-mllvm -vectorize-loops=false" is added here in the test to +// ensure the loop vectorization is executed because the -vectorize-loops cc1 +// flag is passed down, not because "-mllvm -vectorize-loops" is enabled. +// +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-LPV +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -vectorize-loops=false -mllvm -force-vector-width=2 -mllvm -force-vector-interleave=1 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-LPV +// O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} +// O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} + +// Test to ensure the loop interleave codegen option is passed down to the +// ThinLTO backend. The internal loop interleave codegen option will be +// enabled automatically when O2/O3 is available for clang. Once the loop +// interleave option is enabled, "-mllvm -interleave-loops=false" won't disable +// the interleave. currently. "-mllvm -interleave-loops=false" is added here +// in the test to ensure the loop interleave is executed because the interleave +// codegen flag is passed down, not because "-mllvm -interleave-loops" is +// enabled. +// +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O2 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O2-InterLeave +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-obj -O0 -vectorize-loops -mllvm -interleave-loops=false -mllvm -force-vector-width=1 -mllvm -force-vector-interleave=2 -emit-llvm -o - -x ir %t.o -fthinlto-index=%t.thinlto.bc -fexperimental-new-pass-manager 2>&1 | FileCheck %s --check-prefix=O0-InterLeave +// O2-InterLeave: = !{!"llvm.loop.isvectorized", i32 1} +// O0-InterLeave-NOT: = !{!"llvm.loop.isvectorized", i32 1} + +void foo(double *a) { + for (int i = 0; i < 1000; i++) + a[i] = 10; +} Index: lld/COFF/CMakeLists.txt =================================================================== --- lld/COFF/CMakeLists.txt +++ lld/COFF/CMakeLists.txt @@ -37,6 +37,7 @@ MC Object Option + Passes Support WindowsManifest Index: lld/ELF/CMakeLists.txt =================================================================== --- lld/ELF/CMakeLists.txt +++ lld/ELF/CMakeLists.txt @@ -57,6 +57,7 @@ MC Object Option + Passes Support LINK_LIBS Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -93,6 +93,9 @@ c.MAttrs = getMAttrs(); c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.PTO.LoopVectorization = c.OptLevel > 1; + c.PTO.SLPVectorization = c.OptLevel > 1; + // Set up a custom pipeline if we've been asked to. c.OptPipeline = config->ltoNewPmPasses; c.AAPipeline = config->ltoAAPipeline; Index: lld/test/ELF/lto/slp-vectorize-pm.ll =================================================================== --- /dev/null +++ lld/test/ELF/lto/slp-vectorize-pm.ll @@ -0,0 +1,48 @@ +; REQUIRES: x86 +; RUN: opt -module-summary %s -o %t.o + +; Test SLP and Loop Vectorization are enabled by default at O2 and O3. +; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O0 --plugin-opt=save-temps -shared -o %t1.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV + +; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O1 --plugin-opt=save-temps -shared -o %t2.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV + +; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O2 --plugin-opt=save-temps -shared -o %t3.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV + +; RUN: ld.lld --plugin-opt=new-pass-manager --plugin-opt=debug-pass-manager --plugin-opt=O3 --plugin-opt=save-temps -shared -o %t4.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV + +; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O2-SLP: Running pass: SLPVectorizerPass +; CHECK-O3-SLP: Running pass: SLPVectorizerPass +; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32* %a) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %red.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 255 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret i32 %add +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable", i1 true} Index: lld/wasm/CMakeLists.txt =================================================================== --- lld/wasm/CMakeLists.txt +++ lld/wasm/CMakeLists.txt @@ -29,6 +29,7 @@ MC Object Option + Passes Support LINK_LIBS Index: llvm/include/llvm/LTO/Config.h =================================================================== --- llvm/include/llvm/LTO/Config.h +++ llvm/include/llvm/LTO/Config.h @@ -16,6 +16,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" @@ -126,6 +127,9 @@ /// with llvm-lto2. std::unique_ptr ResolutionFile; + /// Tunable parameters for passes in the default pipelines. + PipelineTuningOptions PTO; + /// The following callbacks deal with tasks, which normally represent the /// entire optimization and code generation pipeline for what will become a /// single native object file. Each task has a unique identifier between 0 and Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -169,7 +169,7 @@ PassInstrumentationCallbacks PIC; StandardInstrumentations SI; SI.registerCallbacks(PIC); - PassBuilder PB(TM, PipelineTuningOptions(),PGOOpt, &PIC); + PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC); AAManager AA; // Parse a custom AA pipeline if asked to. Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1902,6 +1902,12 @@ return Error::success(); } + // This is consistent with old pass manager, but inconsistent with + // clang. Clang doesn't enable loop vectorization but does enable + // slp vectorization at Oz. + PTO.LoopVectorization = L > O1 && L < Oz; + PTO.SLPVectorization = L > O1 && L < Oz; + if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); } else if (Matches[1] == "thinlto-pre-link") { Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -239,6 +239,9 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O2-NEXT: Running pass: SLPVectorizerPass +; CHECK-O3-NEXT: Running pass: SLPVectorizerPass +; CHECK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -220,6 +220,9 @@ ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-POSTLINK-O2-NEXT: Running pass: SLPVectorizerPass +; CHECK-POSTLINK-O3-NEXT: Running pass: SLPVectorizerPass +; CHECK-POSTLINK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass Index: llvm/test/tools/gold/X86/slp-vectorize-pm.ll =================================================================== --- /dev/null +++ llvm/test/tools/gold/X86/slp-vectorize-pm.ll @@ -0,0 +1,79 @@ +; RUN: opt -module-summary %s -o %t.o + +; Test SLP and Loop Vectorization are enabled by default at O2 and O3. +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O0 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t2.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O1 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t3.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O2 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t4.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=new-pass-manager \ +; RUN: --plugin-opt=debug-pass-manager \ +; RUN: --plugin-opt=cache-dir=%t.cache \ +; RUN: --plugin-opt=O3 \ +; RUN: --plugin-opt=save-temps \ +; RUN: -shared \ +; RUN: -o %t5.o %t.o 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP +; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV + +; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O2-SLP: Running pass: SLPVectorizerPass +; CHECK-O3-SLP: Running pass: SLPVectorizerPass +; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32* %a) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %red.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 255 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret i32 %add +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable", i1 true} Index: llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll =================================================================== --- /dev/null +++ llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll @@ -0,0 +1,51 @@ +; RUN: opt -module-summary %s -o %t1.bc + +; Test SLP and Loop Vectorization are enabled by default at O2 and O3. +; RUN: llvm-lto2 run %t1.bc -o %t2.o -O0 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O0-SLP +; RUN: llvm-dis %t2.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O0-LPV + +; RUN: llvm-lto2 run %t1.bc -o %t3.o -O1 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O1-SLP +; RUN: llvm-dis %t3.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O1-LPV + +; RUN: llvm-lto2 run %t1.bc -o %t4.o -O2 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O2-SLP +; RUN: llvm-dis %t4.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O2-LPV + +; RUN: llvm-lto2 run %t1.bc -o %t5.o -O3 -r %t1.bc,foo,plx -debug-pass-manager \ +; RUN: -use-new-pm -save-temps 2>&1 | FileCheck %s --check-prefix=CHECK-O3-SLP +; RUN: llvm-dis %t5.o.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-O3-LPV + +; CHECK-O0-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O1-SLP-NOT: Running pass: SLPVectorizerPass +; CHECK-O2-SLP: Running pass: SLPVectorizerPass +; CHECK-O3-SLP: Running pass: SLPVectorizerPass +; CHECK-O0-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O1-LPV-NOT: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo(i32* %a) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %red.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 255 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret i32 %add +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.disable", i1 true} Index: llvm/tools/gold/gold-plugin.cpp =================================================================== --- llvm/tools/gold/gold-plugin.cpp +++ llvm/tools/gold/gold-plugin.cpp @@ -860,6 +860,9 @@ Conf.CGOptLevel = getCGOptLevel(); Conf.DisableVerify = options::DisableVerify; Conf.OptLevel = options::OptLevel; + Conf.PTO.LoopVectorization = options::OptLevel > 1; + Conf.PTO.SLPVectorization = options::OptLevel > 1; + if (options::Parallelism) Backend = createInProcessThinBackend(options::Parallelism); if (options::thinlto_index_only) { Index: llvm/tools/llvm-lto2/CMakeLists.txt =================================================================== --- llvm/tools/llvm-lto2/CMakeLists.txt +++ llvm/tools/llvm-lto2/CMakeLists.txt @@ -9,6 +9,7 @@ LTO MC Object + Passes Support Target ) Index: llvm/tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- llvm/tools/llvm-lto2/llvm-lto2.cpp +++ llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -270,6 +270,8 @@ Conf.OverrideTriple = OverrideTriple; Conf.DefaultTriple = DefaultTriple; Conf.StatsFile = StatsFile; + Conf.PTO.LoopVectorization = Conf.OptLevel > 1; + Conf.PTO.SLPVectorization = Conf.OptLevel > 1; ThinBackend Backend; if (ThinLTODistributedIndexes)