diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5148,7 +5148,9 @@ defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", PosFlag, NegFlag>; - +defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride", + PosFlag, + NegFlag>; } // let Flags = [FC1Option, FlangOption, FlangOnlyOption] def J : JoinedOrSeparate<["-"], "J">, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -51,6 +51,32 @@ options::OPT_I, options::OPT_cpp, options::OPT_nocpp}); } +static bool shouldLoopVersion(const ArgList &Args) { + if (Arg *A = Args.getLastArg(options::OPT_Ofast, options::OPT_O, + options::OPT_O4, options::OPT_floop_versioning, + options::OPT_fno_loop_versioning)) { + if (A->getOption().matches(options::OPT_fno_loop_versioning)) + return false; + + if (A->getOption().matches(options::OPT_floop_versioning)) + return true; + + if (A->getOption().matches(options::OPT_Ofast) || + A->getOption().matches(options::OPT_O4)) + return true; + + if (A->getOption().matches(options::OPT_O)) { + StringRef S(A->getValue()); + unsigned OptLevel = 0; + if (S.getAsInteger(10, OptLevel)) + return false; + + return OptLevel > 2; + } + } + return false; +} + void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const { Args.AddAllArgs(CmdArgs, {options::OPT_module_dir, options::OPT_fdebug_module_writer, @@ -65,6 +91,8 @@ if (stackArrays && !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + if (shouldLoopVersion(Args)) + CmdArgs.push_back("-fversion-loops-for-stride"); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -24,6 +24,7 @@ CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new ///< pass manager. +CODEGENOPT(LoopVersioning, 1, 0) ///< PIC level of the LLVM module. CODEGENOPT(PICLevel, 2, 0) ///< PIC level of the LLVM module. CODEGENOPT(IsPIE, 1, 0) ///< PIE level is the same as PIC Level. CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -72,7 +72,8 @@ "rewrite boxed procedures"); #endif -DisableOption(ExternalNameConversion, "external-name-interop", "convert names with external convention"); +DisableOption(ExternalNameConversion, "external-name-interop", + "convert names with external convention"); /// Generic for adding a pass to the pass manager if it is not disabled. template @@ -170,7 +171,7 @@ /// \param pm - MLIR pass manager that will hold the pipeline definition inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel, - bool stackArrays = false) { + bool stackArrays = false, bool loopVersioning = false) { // simplify the IR mlir::GreedyRewriteConfig config; config.enableRegionSimplification = false; @@ -218,8 +219,7 @@ fir::addBoxedProcedurePass(pm); pm.addNestedPass( fir::createAbstractResultOnFuncOptPass()); - pm.addNestedPass( - fir::createAbstractResultOnGlobalOptPass()); + pm.addNestedPass(fir::createAbstractResultOnGlobalOptPass()); fir::addCodeGenRewritePass(pm); fir::addTargetRewritePass(pm); fir::addExternalNameConversionPass(pm, underscoring); @@ -233,9 +233,10 @@ /// passes pipeline inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel, - bool stackArrays = false, bool underscoring = true) { + bool stackArrays = false, bool underscoring = true, bool loopVersioning = false) { // Add default optimizer pass pipeline. - fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays); + fir::createDefaultFIROptimizerPassPipeline( + pm, optLevel, stackArrays, loopVersioning); // Add codegen pass pipeline. fir::createDefaultFIRCodeGenPassPipeline(pm, optLevel, underscoring); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -130,6 +130,10 @@ clang::driver::options::OPT_fno_stack_arrays, false)) { opts.StackArrays = 1; } + if (args.hasFlag(clang::driver::options::OPT_floop_versioning, + clang::driver::options::OPT_fno_loop_versioning, false)) { + opts.LoopVersioning = 1; + } for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ)) opts.LLVMPassPlugins.push_back(a->getValue()); diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -553,7 +553,7 @@ // Create the pass pipeline fir::createMLIRToLLVMPassPipeline(pm, level, opts.StackArrays, - opts.Underscoring); + opts.Underscoring, opts.LoopVersioning); mlir::applyPassManagerCLOptions(pm); // run the pass manager diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -48,6 +48,8 @@ ! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler ! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) +! CHECK-NEXT: -fno-version-loops-for-stride +! CHECK-NEXT: Do not create unit-strided loops (default) ! CHECK-NEXT: -fopenacc Enable OpenACC ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! CHECK-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). @@ -55,6 +57,8 @@ ! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! CHECK-NEXT: -funderscoring Appends one trailing underscore to external names +! CHECK-NEXT: -fversion-loops-for-stride +! CHECK-NEXT: Create unit-strided versions of loops ! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! CHECK-NEXT: -help Display available options ! CHECK-NEXT: -I Add directory to the end of the list of include search paths diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -46,6 +46,8 @@ ! HELP-NEXT: -fno-integrated-as Disable the integrated assembler ! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) +! HELP-NEXT: -fno-version-loops-for-stride +! HELP-NEXT: Do not create unit-strided loops (default) ! HELP-NEXT: -fopenacc Enable OpenACC ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). @@ -53,6 +55,8 @@ ! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-NEXT: -funderscoring Appends one trailing underscore to external names +! HELP-NEXT: -fversion-loops-for-stride +! HELP-NEXT: Create unit-strided versions of loops ! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-NEXT: -help Display available options ! HELP-NEXT: -I Add directory to the end of the list of include search paths @@ -137,6 +141,8 @@ ! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode ! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) +! HELP-FC1-NEXT: -fno-version-loops-for-stride +! HELP-FC1-NEXT: Do not create unit-strided loops (default) ! HELP-FC1-NEXT: -fopenacc Enable OpenACC ! HELP-FC1-NEXT: -fopenmp-is-device Generate code only for an OpenMP target device. ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. @@ -145,6 +151,8 @@ ! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-FC1-NEXT: -funderscoring Appends one trailing underscore to external names +! HELP-FC1-NEXT: -fversion-loops-for-stride +! HELP-FC1-NEXT: Create unit-strided versions of loops ! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-FC1-NEXT: -help Display available options ! HELP-FC1-NEXT: -init-only Only execute frontend initialization diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -15,6 +15,7 @@ ! RUN: -fassociative-math \ ! RUN: -freciprocal-math \ ! RUN: -fpass-plugin=Bye%pluginext \ +! RUN: -fversion-loops-for-stride \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -33,4 +34,5 @@ ! CHECK: "-freciprocal-math" ! CHECK: "-fconvert=little-endian" ! CHECK: "-fpass-plugin=Bye +! CHECK: "-fversion-loops-for-stride" ! CHECK: "-mllvm" "-print-before-all" diff --git a/flang/test/Driver/version-loops.f90 b/flang/test/Driver/version-loops.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Driver/version-loops.f90 @@ -0,0 +1,54 @@ +! Test that flang-new forwards the -f{no-,}version-loops-for-stride +! options corredly to flang-new -fc1 for different variants of optimisation +! and explicit flags. + +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -O3 \ +! RUN: | FileCheck %s + +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -O2 \ +! RUN: | FileCheck %s --check-prefix=CHECK-O2 + +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -O2 -fversion-loops-for-stride \ +! RUN: | FileCheck %s --check-prefix=CHECK-O2-with + +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -O4 \ +! RUN: | FileCheck %s --check-prefix=CHECK-O4 + +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -Ofast \ +! RUN: | FileCheck %s --check-prefix=CHECK-Ofast + +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -Ofast -fno-version-loops-for-stride \ +! RUN: | FileCheck %s --check-prefix=CHECK-Ofast-no + +! RUN: %flang -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: -O3 -fno-version-loops-for-stride \ +! RUN: | FileCheck %s --check-prefix=CHECK-O3-no + +! CHECK: "-fversion-loops-for-stride" +! CHECK: "-O3" + +! CHECK-O2-NOT: "-fversion-loops-for-stride" +! CHECK-O2: "-O2" + +! CHECK-O2-with: "-fversion-loops-for-stride" +! CHECK-O2-with: "-O2" + +! CHECK-O4: "-fversion-loops-for-stride" +! CHECK-O4: "-O3" + +! CHECK-Ofast: "-ffast-math" +! CHECK-Ofast: "-fversion-loops-for-stride" +! CHECK-Ofast: "-O3" + +! CHECK-Ofast-no: "-ffast-math" +! CHECK-Ofast-no-NOT: "-fversion-loops-for-stride" +! CHECK-Ofast-no: "-O3" + +! CHECK-O3-no-NOT: "-fversion-loops-for-stride" +! CHECK-O3-no: "-O3"