Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -4952,7 +4952,6 @@ defm repack_arrays : BooleanFFlag<"repack-arrays">, Group; defm second_underscore : BooleanFFlag<"second-underscore">, Group; defm sign_zero : BooleanFFlag<"sign-zero">, Group; -defm stack_arrays : BooleanFFlag<"stack-arrays">, Group; defm underscoring : BooleanFFlag<"underscoring">, Group; defm whole_file : BooleanFFlag<"whole-file">, Group; @@ -5032,6 +5031,11 @@ def fno_automatic : Flag<["-"], "fno-automatic">, Group, HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; +def fstack_arrays : Flag<["-"], "fstack-arrays">, Group, + HelpText<"Attempt to allocate array temporaries on the stack, no matter their size">; +def fno_stack_arrays : Flag<["-"], "fno-stack-arrays">, Group, + HelpText<"Allocate array temporaries on the heap (default)">; + } // let Flags = [FC1Option, FlangOption, FlangOnlyOption] def J : JoinedOrSeparate<["-"], "J">, Index: clang/lib/Driver/ToolChains/Flang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Flang.cpp +++ clang/lib/Driver/ToolChains/Flang.cpp @@ -57,6 +57,13 @@ options::OPT_fintrinsic_modules_path, options::OPT_pedantic, options::OPT_std_EQ, options::OPT_W_Joined, options::OPT_fconvert_EQ, options::OPT_fpass_plugin_EQ}); + + Arg *stackArrays = + Args.getLastArg(options::OPT_Ofast, options::OPT_fstack_arrays, + options::OPT_fno_stack_arrays); + if (stackArrays && + !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) + CmdArgs.push_back("-fstack-arrays"); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { Index: flang/docs/FlangDriver.md =================================================================== --- flang/docs/FlangDriver.md +++ flang/docs/FlangDriver.md @@ -546,8 +546,7 @@ documentation for more details. ## Ofast and Fast Math -`-Ofast` in Flang means `-O3 -ffast-math`. `-fstack-arrays` will be added to -`-Ofast` in the future (https://github.com/llvm/llvm-project/issues/59231). +`-Ofast` in Flang means `-O3 -ffast-math -fstack-arrays`. `-ffast-math` means the following: - `-fno-honor-infinities` @@ -570,8 +569,7 @@ ### Comparison with GCC/GFortran GCC/GFortran translate `-Ofast` to -`-O3 -ffast-math -fstack-arrays -fno-semantic-interposition`. `-fstack-arrays` -is TODO for Flang. +`-O3 -ffast-math -fstack-arrays -fno-semantic-interposition`. `-fno-semantic-interposition` is not used because clang does not enable this as part of `-Ofast` as the default behaviour is similar. Index: flang/include/flang/Frontend/CodeGenOptions.def =================================================================== --- flang/include/flang/Frontend/CodeGenOptions.def +++ flang/include/flang/Frontend/CodeGenOptions.def @@ -26,6 +26,7 @@ CODEGENOPT(PICLevel, 2, 0) ///< PIC level of the LLVM module. CODEGENOPT(IsPIE, 1, 0) ///< PIE level is the same as PIC Level. +CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use. Index: flang/include/flang/Tools/CLOptions.inc =================================================================== --- flang/include/flang/Tools/CLOptions.inc +++ flang/include/flang/Tools/CLOptions.inc @@ -156,7 +156,7 @@ /// /// \param pm - MLIR pass manager that will hold the pipeline definition inline void createDefaultFIROptimizerPassPipeline( - mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { + mlir::PassManager &pm, bool stackArrays = false, llvm::OptimizationLevel optLevel = defaultOptLevel) { // simplify the IR mlir::GreedyRewriteConfig config; config.enableRegionSimplification = false; @@ -171,7 +171,11 @@ pm.addPass(fir::createAlgebraicSimplificationPass(config)); } pm.addPass(mlir::createCSEPass()); - fir::addMemoryAllocationOpt(pm); + + if (stackArrays) + pm.addNestedPass(fir::createStackArraysPass()); + else + fir::addMemoryAllocationOpt(pm); // The default inliner pass adds the canonicalizer pass with the default // configuration. Create the inliner pass with tco config. @@ -209,9 +213,9 @@ /// \param optLevel - optimization level used for creating FIR optimization /// passes pipeline inline void createMLIRToLLVMPassPipeline( - mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { + mlir::PassManager &pm, bool stackArrays = false, llvm::OptimizationLevel optLevel = defaultOptLevel) { // Add default optimizer pass pipeline. - fir::createDefaultFIROptimizerPassPipeline(pm, optLevel); + fir::createDefaultFIROptimizerPassPipeline(pm, stackArrays, optLevel); // Add codegen pass pipeline. fir::createDefaultFIRCodeGenPassPipeline(pm); Index: flang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- flang/lib/Frontend/CompilerInvocation.cpp +++ flang/lib/Frontend/CompilerInvocation.cpp @@ -125,6 +125,11 @@ clang::driver::options::OPT_fno_debug_pass_manager, false)) opts.DebugPassManager = 1; + if (args.hasFlag(clang::driver::options::OPT_fstack_arrays, + clang::driver::options::OPT_fno_stack_arrays, false)) { + opts.StackArrays = 1; + } + for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ)) opts.LLVMPassPlugins.push_back(a->getValue()); Index: flang/lib/Frontend/FrontendActions.cpp =================================================================== --- flang/lib/Frontend/FrontendActions.cpp +++ flang/lib/Frontend/FrontendActions.cpp @@ -541,7 +541,7 @@ pm.enableVerifier(/*verifyPasses=*/true); // Create the pass pipeline - fir::createMLIRToLLVMPassPipeline(pm, level); + fir::createMLIRToLLVMPassPipeline(pm, opts.StackArrays, level); mlir::applyPassManagerCLOptions(pm); // run the pass manager Index: flang/test/Driver/driver-help-hidden.f90 =================================================================== --- flang/test/Driver/driver-help-hidden.f90 +++ flang/test/Driver/driver-help-hidden.f90 @@ -47,10 +47,12 @@ ! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler ! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros +! CHECK-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! CHECK-NEXT: -fopenacc Enable OpenACC ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! CHECK-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated +! CHECK-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! CHECK-NEXT: -help Display available options Index: flang/test/Driver/driver-help.f90 =================================================================== --- flang/test/Driver/driver-help.f90 +++ flang/test/Driver/driver-help.f90 @@ -45,10 +45,12 @@ ! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! HELP-NEXT: -fno-integrated-as Disable the integrated assembler ! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros +! HELP-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! HELP-NEXT: -fopenacc Enable OpenACC ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated +! HELP-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-NEXT: -help Display available options @@ -130,10 +132,12 @@ ! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager ! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode ! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros +! HELP-FC1-NEXT: -fno-stack-arrays Allocate array temporaries on the heap (default) ! HELP-FC1-NEXT: -fopenacc Enable OpenACC ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated +! HELP-FC1-NEXT: -fstack-arrays Attempt to allocate array temporaries on the stack, no matter their size ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-FC1-NEXT: -help Display available options Index: flang/test/Driver/fast_math.f90 =================================================================== --- flang/test/Driver/fast_math.f90 +++ flang/test/Driver/fast_math.f90 @@ -1,25 +1,35 @@ ! Test for correct forwarding of fast-math flags from the compiler driver to the ! frontend driver -! -Ofast => -ffast-math -O3 +! -Ofast => -ffast-math -O3 -fstack-arrays ! RUN: %flang -Ofast -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-OFAST %s ! CHECK-OFAST: -fc1 ! CHECK-OFAST-SAME: -ffast-math +! CHECK-OFAST-SAME: -fstack-arrays ! CHECK-OFAST-SAME: -O3 -! TODO: update once -fstack-arays is added -! RUN: %flang -fstack-arrays -fsyntax-only %s -o %t 2>&1 \ +! RUN: %flang -fstack-arrays -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-STACK-ARRAYS %s -! CHECK-STACK-ARRAYS: warning: argument unused during compilation: '-fstack-arrays' +! CHECK-STACK-ARRAYS: -fc1 +! CHECK-STACK-ARRAYS-SAME: -fstack-arrays -! -Ofast -fno-fast-math => -O3 +! -Ofast -fno-fast-math => -O3 -fstack-arrays ! RUN: %flang -Ofast -fno-fast-math -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-FAST %s ! CHECK-OFAST-NO-FAST: -fc1 ! CHECK-OFAST-NO-FAST-NOT: -ffast-math +! CHECK-OFAST-NO-FAST-SAME: -fstack-arrays ! CHECK-OFAST-NO-FAST-SAME: -O3 +! -Ofast -fno-stack-arrays -> -O3 -ffast-math +! RUN: %flang -Ofast -fno-stack-arrays -fsyntax-only -### %s -o %t 2>&1 \ +! RUN: | FileCheck --check-prefix=CHECK-OFAST-NO-SA %s +! CHECK-OFAST-NO-SA: -fc1 +! CHECK-OFAST-NO-SA-SAME: -ffast-math +! CHECK-OFAST-NO-SA-NOT: -fstack-arrays +! CHECK-OFAST-NO-SA-SAME: -O3 + ! -ffast-math => -ffast-math ! RUN: %flang -ffast-math -fsyntax-only -### %s -o %t 2>&1 \ ! RUN: | FileCheck --check-prefix=CHECK-FFAST %s Index: flang/test/Transforms/stack-arrays.f90 =================================================================== --- flang/test/Transforms/stack-arrays.f90 +++ flang/test/Transforms/stack-arrays.f90 @@ -1,5 +1,10 @@ ! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --array-value-copy | fir-opt --stack-arrays | FileCheck %s +! We have to check llvm ir here to force flang to run the whole mlir pipeline +! this is just to check that -fstack-arrays enables the stack-arrays pass so +! only check the first example +! RUN: %flang_fc1 -emit-llvm -o - -fstack-arrays %s | FileCheck --check-prefix=CHECK-LLVM %s + ! check simple array value copy case subroutine array_value_copy_simple(arr) integer, intent(inout) :: arr(4) @@ -14,6 +19,15 @@ ! CHECK: return ! CHECK-NEXT: } +! CHECK-LLVM: array_value_copy_simple +! CHECK-LLVM-NOT: malloc +! CHECK-LLVM-NOT: free +! CHECK-LLVM: alloca [4 x i32] +! CHECK-LLVM-NOT: malloc +! CHECK-LLVM-NOT: free +! CHECK-LLVM: ret void +! CHECK-LLVM-NEXT: } + ! check complex array value copy case module stuff type DerivedWithAllocatable Index: flang/tools/bbc/bbc.cpp =================================================================== --- flang/tools/bbc/bbc.cpp +++ flang/tools/bbc/bbc.cpp @@ -273,7 +273,8 @@ pm.addPass(std::make_unique()); // Add O2 optimizer pass pipeline. - fir::createDefaultFIROptimizerPassPipeline(pm, llvm::OptimizationLevel::O2); + fir::createDefaultFIROptimizerPassPipeline(pm, false, + llvm::OptimizationLevel::O2); } if (mlir::succeeded(pm.run(mlirModule))) { Index: flang/tools/tco/tco.cpp =================================================================== --- flang/tools/tco/tco.cpp +++ flang/tools/tco/tco.cpp @@ -122,7 +122,7 @@ fir::createDefaultFIRCodeGenPassPipeline(pm); } else { // Run tco with O2 by default. - fir::createMLIRToLLVMPassPipeline(pm, llvm::OptimizationLevel::O2); + fir::createMLIRToLLVMPassPipeline(pm, false, llvm::OptimizationLevel::O2); } fir::addLLVMDialectToLLVMPass(pm, out.os()); }