diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -482,6 +482,40 @@ Group; } +// Works like BoolOption except without marshalling +multiclass BoolOptionWithoutMarshalling> { + defvar flag1 = FlagDefExpanded.Result, prefix, + NAME, spelling_base>; + + defvar flag2 = FlagDefExpanded.Result, prefix, + NAME, spelling_base>; + + // The flags must have different polarity, different values, and only + // one can be implied. + assert !xor(flag1.Polarity, flag2.Polarity), + "the flags must have different polarity: flag1: " # + flag1.Polarity # ", flag2: " # flag2.Polarity; + assert !ne(flag1.Value, flag2.Value), + "the flags must have different values: flag1: " # + flag1.Value # ", flag2: " # flag2.Value; + assert !not(!and(flag1.CanBeImplied, flag2.CanBeImplied)), + "only one of the flags can be implied: flag1: " # + flag1.CanBeImplied # ", flag2: " # flag2.CanBeImplied; + + defvar implied = !if(flag1.CanBeImplied, flag1, flag2); + + def flag1.RecordName : Flag<["-"], flag1.Spelling>, Flags, + HelpText, + ImpliedByAnyOf + {} + def flag2.RecordName : Flag<["-"], flag2.Spelling>, Flags, + HelpText, + ImpliedByAnyOf + {} +} + // FIXME: Diagnose if target does not support protected visibility. class MarshallingInfoVisibility : MarshallingInfoEnum, @@ -4986,7 +5020,6 @@ defm repack_arrays : BooleanFFlag<"repack-arrays">, Group; defm second_underscore : BooleanFFlag<"second-underscore">, Group; defm sign_zero : BooleanFFlag<"sign-zero">, Group; -defm stack_arrays : BooleanFFlag<"stack-arrays">, Group; defm underscoring : BooleanFFlag<"underscoring">, Group; defm whole_file : BooleanFFlag<"whole-file">, Group; @@ -5066,6 +5099,10 @@ def fno_automatic : Flag<["-"], "fno-automatic">, Group, HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; +defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", + PosFlag, + NegFlag>; + } // let Flags = [FC1Option, FlangOption, FlangOnlyOption] def J : JoinedOrSeparate<["-"], "J">, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -57,6 +57,13 @@ options::OPT_fintrinsic_modules_path, options::OPT_pedantic, options::OPT_std_EQ, options::OPT_W_Joined, options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ}); + + Arg *stackArrays = + Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays, + options::OPT_fno_stack_arrays); + if (stackArrays && + !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) + CmdArgs.push_back("-fstack-arrays"); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md --- a/flang/docs/FlangDriver.md +++ b/flang/docs/FlangDriver.md @@ -546,8 +546,7 @@ documentation for more details. ## Ofast and Fast Math -`-Ofast` in Flang means `-O3 -ffast-math`. `-fstack-arrays` will be added to -`-Ofast` in the future (https://github.com/llvm/llvm-project/issues/59231). +`-Ofast` in Flang means `-O3 -ffast-math -fstack-arrays`. `-ffast-math` means the following: - `-fno-honor-infinities` @@ -570,9 +569,8 @@ ### Comparison with GCC/GFortran GCC/GFortran translate `-Ofast` to -`-O3 -ffast-math -fstack-arrays -fno-semantic-interposition`. `-fstack-arrays` -is TODO for Flang. -`-fno-semantic-interposition` is not used because clang does not enable this as +`-O3 -ffast-math -fstack-arrays -fno-semantic-interposition`. +`-fno-semantic-interposition` is not used because Clang does not enable this as part of `-Ofast` as the default behaviour is similar. GCC/GFortran has a wider definition of `-ffast-math`: also including diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -26,6 +26,7 @@ CODEGENOPT(PICLevel, 2, 0) ///< PIC level of the LLVM module. CODEGENOPT(IsPIE, 1, 0) ///< PIE level is the same as PIC Level. +CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use. diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -166,8 +166,9 @@ /// incremental conversion of FIR. /// /// \param pm - MLIR pass manager that will hold the pipeline definition -inline void createDefaultFIROptimizerPassPipeline( - mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { +inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm, + llvm::OptimizationLevel optLevel = defaultOptLevel, + bool stackArrays = false) { // simplify the IR mlir::GreedyRewriteConfig config; config.enableRegionSimplification = false; @@ -182,7 +183,11 @@ pm.addPass(fir::createAlgebraicSimplificationPass(config)); } pm.addPass(mlir::createCSEPass()); - fir::addMemoryAllocationOpt(pm); + + if (stackArrays) + pm.addPass(fir::createStackArraysPass()); + else + fir::addMemoryAllocationOpt(pm); // The default inliner pass adds the canonicalizer pass with the default // configuration. Create the inliner pass with tco config. @@ -220,10 +225,11 @@ /// \param pm - MLIR pass manager that will hold the pipeline definition /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline -inline void createMLIRToLLVMPassPipeline( - mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { +inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, + llvm::OptimizationLevel optLevel = defaultOptLevel, + bool stackArrays = false) { // Add default optimizer pass pipeline. - fir::createDefaultFIROptimizerPassPipeline(pm, optLevel); + fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays); // Add codegen pass pipeline. fir::createDefaultFIRCodeGenPassPipeline(pm, optLevel); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -126,6 +126,11 @@ clang::driver::options::OPT_fno_debug_pass_manager, false)) opts.DebugPassManager = 1; + if (args.hasFlag(clang::driver::options::OPT_fstack_arrays, + clang::driver::options::OPT_fno_stack_arrays, false)) { + opts.StackArrays = 1; + } + for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ)) opts.LLVMPassPlugins.push_back(a->getValue()); diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -545,7 +545,7 @@ pm.enableVerifier(/*verifyPasses=*/true); // Create the pass pipeline - fir::createMLIRToLLVMPassPipeline(pm, level); + fir::createMLIRToLLVMPassPipeline(pm, level, opts.StackArrays); mlir::applyPassManagerCLOptions(pm); // run the pass manager diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -47,10 +47,12 @@ ! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler ! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros +! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! CHECK-NEXT: -fopenacc Enable OpenACC ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! CHECK-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated +! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! CHECK-NEXT: -help Display available options diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -45,10 +45,12 @@ ! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! HELP-NEXT: -fno-integrated-as Disable the integrated assembler ! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros +! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! HELP-NEXT: -fopenacc Enable OpenACC ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated +! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-NEXT: -help Display available options @@ -133,10 +135,12 @@ ! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager ! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode ! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros +! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! HELP-FC1-NEXT: -fopenacc Enable OpenACC ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated +! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-FC1-NEXT: -help Display available options diff --git a/flang/test/Driver/fast_math.f90 b/flang/test/Driver/fast_math.f90 --- a/flang/test/Driver/fast_math.f90 +++ b/flang/test/Driver/fast_math.f90 @@ -1,25 +1,35 @@ ! Test for correct forwarding of fast-math flags from the compiler driver to the ! frontend driver -! -Ofast => -ffast-math -O3 +! -Ofast => -ffast-math -O3 -fstack-arrays ! RUN: %flang -Ofast -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-OFAST %s ! CHECK-OFAST: -fc1 ! CHECK-OFAST-SAME: -ffast-math +! CHECK-OFAST-SAME: -fstack-arrays ! CHECK-OFAST-SAME: -O3 -! TODO: update once -fstack-arays is added -! RUN: %flang -fstack-arrays -fsyntax-only %s -o %t 2>&1 \ +! RUN: %flang -fstack-arrays -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-STACK-ARRAYS %s -! CHECK-STACK-ARRAYS: warning: argument unused during compilation: '-fstack-arrays' +! CHECK-STACK-ARRAYS: -fc1 +! CHECK-STACK-ARRAYS-SAME: -fstack-arrays -! -Ofast -fno-fast-math => -O3 +! -Ofast -fno-fast-math => -O3 -fstack-arrays ! RUN: %flang -Ofast -fno-fast-math -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-FAST %s ! CHECK-OFAST-NO-FAST: -fc1 ! CHECK-OFAST-NO-FAST-NOT: -ffast-math +! CHECK-OFAST-NO-FAST-SAME: -fstack-arrays ! CHECK-OFAST-NO-FAST-SAME: -O3 +! -Ofast -fno-stack-arrays -> -O3 -ffast-math +! RUN: %flang -Ofast -fno-stack-arrays -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-SA %s +! CHECK-OFAST-NO-SA: -fc1 +! CHECK-OFAST-NO-SA-SAME: -ffast-math +! CHECK-OFAST-NO-SA-NOT: -fstack-arrays +! CHECK-OFAST-NO-SA-SAME: -O3 + ! -ffast-math => -ffast-math ! RUN: %flang -ffast-math -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-FFAST %s diff --git a/flang/test/Transforms/stack-arrays.f90 b/flang/test/Transforms/stack-arrays.f90 --- a/flang/test/Transforms/stack-arrays.f90 +++ b/flang/test/Transforms/stack-arrays.f90 @@ -1,5 +1,10 @@ ! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --array-value-copy | fir-opt --stack-arrays | FileCheck %s +! In order to verify the whole MLIR pipeline, make the driver generate LLVM IR. +! This is only to check that -fstack-arrays enables the stack-arrays pass so +! only check the first example +! RUN: %flang_fc1 -emit-llvm -o - -fstack-arrays %s | FileCheck --check-prefix=LLVM-IR %s + ! check simple array value copy case subroutine array_value_copy_simple(arr) integer, intent(inout) :: arr(4) @@ -14,6 +19,15 @@ ! CHECK: return ! CHECK-NEXT: } +! LLVM-IR: array_value_copy_simple +! LLVM-IR-NOT: malloc +! LLVM-IR-NOT: free +! LLVM-IR: alloca [4 x i32] +! LLVM-IR-NOT: malloc +! LLVM-IR-NOT: free +! LLVM-IR: ret void +! LLVM-IR-NEXT: } + ! check complex array value copy case module stuff type DerivedWithAllocatable