diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -15,6 +15,7 @@ #include "flang/Frontend/CodeGenOptions.h" #include "flang/Frontend/FrontendOptions.h" +#include "flang/Frontend/LoweringOptions.h" #include "flang/Frontend/PreprocessorOptions.h" #include "flang/Frontend/TargetOptions.h" #include "flang/Parser/parsing.h" @@ -68,6 +69,9 @@ // of options. Fortran::parser::Options parserOpts; + /// Options controlling lowering. + Fortran::frontend::LoweringOptions loweringOptions; + /// Options controlling the target. Fortran::frontend::TargetOptions targetOpts; @@ -136,6 +140,9 @@ CodeGenOptions &getCodeGenOpts() { return codeGenOpts; } const CodeGenOptions &getCodeGenOpts() const { return codeGenOpts; } + LoweringOptions &getLoweringOptions() { return loweringOptions; } + const LoweringOptions &getLoweringOptions() const { return loweringOptions; } + Fortran::semantics::SemanticsContext &getSemanticsContext() { return *semanticsContext; } diff --git a/flang/include/flang/Frontend/LoweringOptions.h b/flang/include/flang/Frontend/LoweringOptions.h new file mode 100644 --- /dev/null +++ b/flang/include/flang/Frontend/LoweringOptions.h @@ -0,0 +1,36 @@ +//===- LoweringOptions.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Options controlling lowering of front-end fragments to the FIR dialect +/// of MLIR +/// +//===----------------------------------------------------------------------===// + +#ifndef FLANG_FRONTEND_LOWERINGOPTIONS_H +#define FLANG_FRONTEND_LOWERINGOPTIONS_H + +namespace Fortran::frontend { + +class LoweringOptions { + /// If true, lower transpose without a runtime call. + unsigned optimizeTranspose : 1; + +public: + LoweringOptions() : optimizeTranspose(true) {} + + bool getOptimizeTranspose() const { return optimizeTranspose; } + LoweringOptions &setOptimizeTranspose(bool v) { + optimizeTranspose = v; + return *this; + } +}; + +} // namespace Fortran::frontend + +#endif // FLANG_FRONTEND_LOWERINGOPTIONS_H diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -14,6 +14,7 @@ #define FORTRAN_LOWER_ABSTRACTCONVERTER_H #include "flang/Common/Fortran.h" +#include "flang/Frontend/LoweringOptions.h" #include "flang/Lower/PFTDefs.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" @@ -223,7 +224,22 @@ /// Get the KindMap. virtual const fir::KindMapping &getKindMap() = 0; + AbstractConverter(const Fortran::frontend::LoweringOptions &loweringOptions) + : loweringOptions(loweringOptions) {} virtual ~AbstractConverter() = default; + + //===--------------------------------------------------------------------===// + // Miscellaneous + //===--------------------------------------------------------------------===// + + /// Return options controlling lowering behavior. + const Fortran::frontend::LoweringOptions &getLoweringOptions() const { + return loweringOptions; + } + +private: + /// Options controlling lowering behavior. + const Fortran::frontend::LoweringOptions &loweringOptions; }; } // namespace lower diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h --- a/flang/include/flang/Lower/Bridge.h +++ b/flang/include/flang/Lower/Bridge.h @@ -14,6 +14,7 @@ #define FORTRAN_LOWER_BRIDGE_H #include "flang/Common/Fortran.h" +#include "flang/Frontend/LoweringOptions.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Support/KindMapping.h" @@ -52,9 +53,10 @@ const Fortran::evaluate::IntrinsicProcTable &intrinsics, const Fortran::evaluate::TargetCharacteristics &targetCharacteristics, const Fortran::parser::AllCookedSources &allCooked, - llvm::StringRef triple, fir::KindMapping &kindMap) { + llvm::StringRef triple, fir::KindMapping &kindMap, + const Fortran::frontend::LoweringOptions &loweringOptions) { return LoweringBridge(ctx, defaultKinds, intrinsics, targetCharacteristics, - allCooked, triple, kindMap); + allCooked, triple, kindMap, loweringOptions); } //===--------------------------------------------------------------------===// @@ -83,6 +85,10 @@ /// Get the kind map. const fir::KindMapping &getKindMap() const { return kindMap; } + const Fortran::frontend::LoweringOptions &getLoweringOptions() const { + return loweringOptions; + } + /// Create a folding context. Careful: this is very expensive. Fortran::evaluate::FoldingContext createFoldingContext() const; @@ -107,7 +113,8 @@ const Fortran::evaluate::IntrinsicProcTable &intrinsics, const Fortran::evaluate::TargetCharacteristics &targetCharacteristics, const Fortran::parser::AllCookedSources &cooked, llvm::StringRef triple, - fir::KindMapping &kindMap); + fir::KindMapping &kindMap, + const Fortran::frontend::LoweringOptions &loweringOptions); LoweringBridge() = delete; LoweringBridge(const LoweringBridge &) = delete; @@ -118,6 +125,7 @@ mlir::MLIRContext &context; std::unique_ptr module; fir::KindMapping &kindMap; + const Fortran::frontend::LoweringOptions &loweringOptions; }; } // namespace lower diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -586,6 +586,17 @@ return diags.getNumErrors() == numErrorsBefore; } +/// Set \p loweringOptions controlling lowering behavior based +/// on the \p optimizationLevel. +static bool +setLoweringOptions(Fortran::frontend::LoweringOptions &loweringOptions, + unsigned optimizationLevel) { + // Lower TRANSPOSE as a runtime call under -O0. + loweringOptions.setOptimizeTranspose(optimizationLevel > 0); + + return true; +} + bool CompilerInvocation::createFromArgs( CompilerInvocation &res, llvm::ArrayRef commandLineArgs, clang::DiagnosticsEngine &diags) { @@ -640,6 +651,9 @@ res.frontendOpts.mlirArgs = args.getAllArgValues(clang::driver::options::OPT_mmlir); + success &= setLoweringOptions(res.getLoweringOptions(), + res.getCodeGenOpts().OptimizationLevel); + return success; } diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -148,7 +148,7 @@ *mlirCtx, defKinds, ci.getInvocation().getSemanticsContext().intrinsics(), ci.getInvocation().getSemanticsContext().targetCharacteristics(), ci.getParsing().allCooked(), ci.getInvocation().getTargetOpts().triple, - kindMap); + kindMap, ci.getInvocation().getLoweringOptions()); // Create a parse tree and lower it to FIR Fortran::parser::Program &parseTree{*ci.getParsing().parseTree()}; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -179,7 +179,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { public: explicit FirConverter(Fortran::lower::LoweringBridge &bridge) - : bridge{bridge}, foldingContext{bridge.createFoldingContext()} {} + : Fortran::lower::AbstractConverter(bridge.getLoweringOptions()), + bridge{bridge}, foldingContext{bridge.createFoldingContext()} {} virtual ~FirConverter() = default; /// Convert the PFT to FIR. @@ -3217,10 +3218,11 @@ const Fortran::evaluate::IntrinsicProcTable &intrinsics, const Fortran::evaluate::TargetCharacteristics &targetCharacteristics, const Fortran::parser::AllCookedSources &cooked, llvm::StringRef triple, - fir::KindMapping &kindMap) + fir::KindMapping &kindMap, + const Fortran::frontend::LoweringOptions &loweringOptions) : defaultKinds{defaultKinds}, intrinsics{intrinsics}, targetCharacteristics{targetCharacteristics}, cooked{&cooked}, - context{context}, kindMap{kindMap} { + context{context}, kindMap{kindMap}, loweringOptions{loweringOptions} { // Register the diagnostic handler. context.getDiagEngine().registerHandler([](mlir::Diagnostic &diag) { llvm::raw_ostream &os = llvm::errs(); diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -88,12 +88,16 @@ // from the "inline" FIR, e.g. it may diagnose out-of-memory conditions // during the temporary allocation whereas the inline implementation // relies on AllocMemOp that will silently return null in case -// there is not enough memory. So it may be a good idea to set -// this option to false for -O0. +// there is not enough memory. +// +// If it is set to false, then TRANSPOSE will be lowered using +// a runtime call. If it is set to true, then the lowering is controlled +// by LoweringOptions::optimizeTranspose bit (see isTransposeOptEnabled +// function in this file). static llvm::cl::opt optimizeTranspose( "opt-transpose", llvm::cl::desc("lower transpose without using a runtime call"), - llvm::cl::init(true)); + llvm::cl::init(true), llvm::cl::Hidden); /// The various semantics of a program constituent (or a part thereof) as it may /// appear in an expression. @@ -595,36 +599,50 @@ module->name().ToString().find("omp_lib") == std::string::npos; } +// Return true if TRANSPOSE should be lowered without a runtime call. +static bool +isTransposeOptEnabled(const Fortran::lower::AbstractConverter &converter) { + return optimizeTranspose && + converter.getLoweringOptions().getOptimizeTranspose(); +} + // A set of visitors to detect if the given expression // is a TRANSPOSE call that should be lowered without using // runtime TRANSPOSE implementation. template -static bool isOptimizableTranspose(const T &) { +static bool isOptimizableTranspose(const T &, + const Fortran::lower::AbstractConverter &) { return false; } static bool -isOptimizableTranspose(const Fortran::evaluate::ProcedureRef &procRef) { +isOptimizableTranspose(const Fortran::evaluate::ProcedureRef &procRef, + const Fortran::lower::AbstractConverter &converter) { const Fortran::evaluate::SpecificIntrinsic *intrin = procRef.proc().GetSpecificIntrinsic(); - return optimizeTranspose && intrin && intrin->name == "transpose"; + return isTransposeOptEnabled(converter) && intrin && + intrin->name == "transpose"; } template static bool -isOptimizableTranspose(const Fortran::evaluate::FunctionRef &funcRef) { +isOptimizableTranspose(const Fortran::evaluate::FunctionRef &funcRef, + const Fortran::lower::AbstractConverter &converter) { return isOptimizableTranspose( - static_cast(funcRef)); + static_cast(funcRef), converter); } template -static bool isOptimizableTranspose(Fortran::evaluate::Expr expr) { +static bool +isOptimizableTranspose(Fortran::evaluate::Expr expr, + const Fortran::lower::AbstractConverter &converter) { // If optimizeTranspose is not enabled, return false right away. - if (!optimizeTranspose) + if (!isTransposeOptEnabled(converter)) return false; - return std::visit([&](const auto &e) { return isOptimizableTranspose(e); }, - expr.u); + return std::visit( + [&](const auto &e) { return isOptimizableTranspose(e, converter); }, + expr.u); } namespace { @@ -3289,7 +3307,7 @@ // is used to not create a new temporary storage. if (isScalar(x) || Fortran::evaluate::UnwrapWholeSymbolOrComponentDataRef(x) || - (isTransformationalRef(x) && !isOptimizableTranspose(x))) + (isTransformationalRef(x) && !isOptimizableTranspose(x, converter))) return std::visit([&](const auto &e) { return genref(e); }, x.u); if (useBoxArg) return asArrayArg(x); @@ -5139,7 +5157,7 @@ llvm::Optional retTy) { mlir::Location loc = getLoc(); - if (isOptimizableTranspose(procRef)) + if (isOptimizableTranspose(procRef, converter)) return genTransposeProcRef(procRef); if (procRef.IsElemental()) { diff --git a/flang/test/Lower/Intrinsics/transpose.f90 b/flang/test/Lower/Intrinsics/transpose.f90 --- a/flang/test/Lower/Intrinsics/transpose.f90 +++ b/flang/test/Lower/Intrinsics/transpose.f90 @@ -1,4 +1,5 @@ ! RUN: bbc -emit-fir %s -opt-transpose=false -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -O0 %s -o - | FileCheck %s ! CHECK-LABEL: func @_QPtranspose_test( ! CHECK-SAME: %[[source:.*]]: !fir.ref>{{.*}}) { diff --git a/flang/test/Lower/Intrinsics/transpose_opt.f90 b/flang/test/Lower/Intrinsics/transpose_opt.f90 --- a/flang/test/Lower/Intrinsics/transpose_opt.f90 +++ b/flang/test/Lower/Intrinsics/transpose_opt.f90 @@ -1,37 +1,31 @@ ! RUN: bbc -emit-fir %s -opt-transpose=true -o - | FileCheck %s +! RUN: bbc -emit-fir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -O1 %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -O2 %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir -O3 %s -o - | FileCheck %s ! CHECK-LABEL: func.func @_QPtranspose_test( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "mat"}) { subroutine transpose_test(mat) real :: mat(2,3) call bar_transpose_test(transpose(mat)) -! CHECK: %[[VAL_1:.*]] = arith.constant 2 : index -! CHECK: %[[VAL_2:.*]] = arith.constant 3 : index -! CHECK: %[[VAL_3:.*]] = arith.constant 3 : index -! CHECK: %[[VAL_4:.*]] = arith.constant 2 : index -! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_1]], %[[VAL_2]] : (index, index) -> !fir.shape<2> -! CHECK: %[[VAL_6:.*]] = fir.array_load %[[VAL_0]](%[[VAL_5]]) : (!fir.ref>, !fir.shape<2>) -> !fir.array<2x3xf32> +! CHECK: %[[VAL_6:.*]] = fir.array_load %[[VAL_0]](%{{.*}}) : (!fir.ref>, !fir.shape<2>) -> !fir.array<2x3xf32> ! CHECK: %[[VAL_7:.*]] = fir.allocmem !fir.array<3x2xf32> -! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_3]], %[[VAL_4]] : (index, index) -> !fir.shape<2> -! CHECK: %[[VAL_9:.*]] = fir.array_load %[[VAL_7]](%[[VAL_8]]) : (!fir.heap>, !fir.shape<2>) -> !fir.array<3x2xf32> -! CHECK: %[[VAL_10:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_11:.*]] = arith.constant 0 : index -! CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_3]], %[[VAL_10]] : index -! CHECK: %[[VAL_13:.*]] = arith.subi %[[VAL_4]], %[[VAL_10]] : index -! CHECK: %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_11]] to %[[VAL_13]] step %[[VAL_10]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (!fir.array<3x2xf32>) { -! CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_10]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (!fir.array<3x2xf32>) { +! CHECK: %[[VAL_9:.*]] = fir.array_load %[[VAL_7]](%{{.*}}) : (!fir.heap>, !fir.shape<2>) -> !fir.array<3x2xf32> +! CHECK: %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (!fir.array<3x2xf32>) { +! CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (!fir.array<3x2xf32>) { ! CHECK: %[[VAL_20:.*]] = fir.array_fetch %[[VAL_6]], %[[VAL_15]], %[[VAL_18]] : (!fir.array<2x3xf32>, index, index) -> f32 ! CHECK: %[[VAL_21:.*]] = fir.array_update %[[VAL_19]], %[[VAL_20]], %[[VAL_18]], %[[VAL_15]] : (!fir.array<3x2xf32>, f32, index, index) -> !fir.array<3x2xf32> ! CHECK: fir.result %[[VAL_21]] : !fir.array<3x2xf32> ! CHECK: } -! CHECK: fir.result %[[VAL_22:.*]] : !fir.array<3x2xf32> +! CHECK: fir.result %[[VAL_17]] : !fir.array<3x2xf32> ! CHECK: } -! CHECK: fir.array_merge_store %[[VAL_9]], %[[VAL_23:.*]] to %[[VAL_7]] : !fir.array<3x2xf32>, !fir.array<3x2xf32>, !fir.heap> +! CHECK: fir.array_merge_store %[[VAL_9]], %[[VAL_14]] to %[[VAL_7]] : !fir.array<3x2xf32>, !fir.array<3x2xf32>, !fir.heap> ! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_7]] : (!fir.heap>) -> !fir.ref> ! CHECK: fir.call @_QPbar_transpose_test(%[[VAL_24]]) : (!fir.ref>) -> () ! CHECK: fir.freemem %[[VAL_7]] : !fir.heap> -! CHECK: return -! CHECK: } + +! CHECK-NOT: @_FortranATranspose end subroutine ! CHECK-LABEL: func.func @_QPtranspose_allocatable_test( @@ -39,96 +33,66 @@ subroutine transpose_allocatable_test(mat) real, allocatable :: mat(:,:) mat = transpose(mat) +! Verify that the "optimized" TRANSPOSE loops are generated +! three times in each branch checking the status of LHS allocatable. + ! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> -! CHECK: %[[VAL_2:.*]] = arith.constant 0 : index -! CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_2]] : (!fir.box>>, index) -> (index, index, index) -! CHECK: %[[VAL_4:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box>>, index) -> (index, index, index) ! CHECK: %[[VAL_6:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>>) -> !fir.heap> -! CHECK: %[[VAL_7:.*]] = fir.shape_shift %[[VAL_3]]#0, %[[VAL_3]]#1, %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index, index, index) -> !fir.shapeshift<2> -! CHECK: %[[VAL_8:.*]] = fir.array_load %[[VAL_6]](%[[VAL_7]]) : (!fir.heap>, !fir.shapeshift<2>) -> !fir.array +! CHECK: %[[VAL_8:.*]] = fir.array_load %[[VAL_6]](%{{.*}}) : (!fir.heap>, !fir.shapeshift<2>) -> !fir.array + ! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> ! CHECK: %[[VAL_10:.*]] = fir.box_addr %[[VAL_9]] : (!fir.box>>) -> !fir.heap> -! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (!fir.heap>) -> i64 -! CHECK: %[[VAL_12:.*]] = arith.constant 0 : i64 -! CHECK: %[[VAL_13:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_12]] : i64 -! CHECK: %[[VAL_14:.*]]:2 = fir.if %[[VAL_13]] -> (i1, !fir.heap>) { -! CHECK: %[[VAL_15:.*]] = arith.constant false -! CHECK: %[[VAL_16:.*]] = arith.constant 0 : index -! CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_16]] : (!fir.box>>, index) -> (index, index, index) -! CHECK: %[[VAL_18:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_19:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_18]] : (!fir.box>>, index) -> (index, index, index) -! CHECK: %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_17]]#1, %[[VAL_5]]#1 : index -! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_20]], %[[VAL_15]] : i1 -! CHECK: %[[VAL_22:.*]] = arith.cmpi ne, %[[VAL_19]]#1, %[[VAL_3]]#1 : index -! CHECK: %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_22]], %[[VAL_21]] : i1 -! CHECK: %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.heap>) { -! CHECK: %[[VAL_25:.*]] = fir.allocmem !fir.array, %[[VAL_5]]#1, %[[VAL_3]]#1 {uniq_name = ".auto.alloc"} -! CHECK: %[[VAL_26:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2> -! CHECK: %[[VAL_27:.*]] = fir.array_load %[[VAL_25]](%[[VAL_26]]) : (!fir.heap>, !fir.shape<2>) -> !fir.array -! CHECK: %[[VAL_28:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_29:.*]] = arith.constant 0 : index -! CHECK: %[[VAL_30:.*]] = arith.subi %[[VAL_5]]#1, %[[VAL_28]] : index -! CHECK: %[[VAL_31:.*]] = arith.subi %[[VAL_3]]#1, %[[VAL_28]] : index -! CHECK: %[[VAL_32:.*]] = fir.do_loop %[[VAL_33:.*]] = %[[VAL_29]] to %[[VAL_31]] step %[[VAL_28]] unordered iter_args(%[[VAL_34:.*]] = %[[VAL_27]]) -> (!fir.array) { -! CHECK: %[[VAL_35:.*]] = fir.do_loop %[[VAL_36:.*]] = %[[VAL_29]] to %[[VAL_30]] step %[[VAL_28]] unordered iter_args(%[[VAL_37:.*]] = %[[VAL_34]]) -> (!fir.array) { + +! CHECK: %[[VAL_14:.*]]:2 = fir.if %{{.*}} -> (i1, !fir.heap>) { + +! CHECK: %[[VAL_24:.*]] = fir.if %{{.*}} -> (!fir.heap>) { + +! CHECK: %[[VAL_25:.*]] = fir.allocmem !fir.array +! CHECK: %[[VAL_27:.*]] = fir.array_load %[[VAL_25]](%{{.*}}) : (!fir.heap>, !fir.shape<2>) -> !fir.array + +! CHECK: %[[VAL_32:.*]] = fir.do_loop %[[VAL_33:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_34:.*]] = %[[VAL_27]]) -> (!fir.array) { +! CHECK: %[[VAL_35:.*]] = fir.do_loop %[[VAL_36:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_37:.*]] = %[[VAL_34]]) -> (!fir.array) { ! CHECK: %[[VAL_38:.*]] = fir.array_fetch %[[VAL_8]], %[[VAL_33]], %[[VAL_36]] : (!fir.array, index, index) -> f32 ! CHECK: %[[VAL_39:.*]] = fir.array_update %[[VAL_37]], %[[VAL_38]], %[[VAL_36]], %[[VAL_33]] : (!fir.array, f32, index, index) -> !fir.array ! CHECK: fir.result %[[VAL_39]] : !fir.array ! CHECK: } -! CHECK: fir.result %[[VAL_40:.*]] : !fir.array +! CHECK: fir.result %[[VAL_35]] : !fir.array ! CHECK: } -! CHECK: fir.array_merge_store %[[VAL_27]], %[[VAL_41:.*]] to %[[VAL_25]] : !fir.array, !fir.array, !fir.heap> -! CHECK: fir.result %[[VAL_25]] : !fir.heap> +! CHECK: fir.array_merge_store %[[VAL_27]], %[[VAL_32]] to %[[VAL_25]] : !fir.array, !fir.array, !fir.heap> + ! CHECK: } else { -! CHECK: %[[VAL_42:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2> -! CHECK: %[[VAL_43:.*]] = fir.array_load %[[VAL_10]](%[[VAL_42]]) : (!fir.heap>, !fir.shape<2>) -> !fir.array -! CHECK: %[[VAL_44:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_45:.*]] = arith.constant 0 : index -! CHECK: %[[VAL_46:.*]] = arith.subi %[[VAL_5]]#1, %[[VAL_44]] : index -! CHECK: %[[VAL_47:.*]] = arith.subi %[[VAL_3]]#1, %[[VAL_44]] : index -! CHECK: %[[VAL_48:.*]] = fir.do_loop %[[VAL_49:.*]] = %[[VAL_45]] to %[[VAL_47]] step %[[VAL_44]] unordered iter_args(%[[VAL_50:.*]] = %[[VAL_43]]) -> (!fir.array) { -! CHECK: %[[VAL_51:.*]] = fir.do_loop %[[VAL_52:.*]] = %[[VAL_45]] to %[[VAL_46]] step %[[VAL_44]] unordered iter_args(%[[VAL_53:.*]] = %[[VAL_50]]) -> (!fir.array) { + +! CHECK: %[[VAL_43:.*]] = fir.array_load %[[VAL_10]](%{{.*}}) : (!fir.heap>, !fir.shape<2>) -> !fir.array + +! CHECK: %[[VAL_48:.*]] = fir.do_loop %[[VAL_49:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_50:.*]] = %[[VAL_43]]) -> (!fir.array) { +! CHECK: %[[VAL_51:.*]] = fir.do_loop %[[VAL_52:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_53:.*]] = %[[VAL_50]]) -> (!fir.array) { ! CHECK: %[[VAL_54:.*]] = fir.array_fetch %[[VAL_8]], %[[VAL_49]], %[[VAL_52]] : (!fir.array, index, index) -> f32 ! CHECK: %[[VAL_55:.*]] = fir.array_update %[[VAL_53]], %[[VAL_54]], %[[VAL_52]], %[[VAL_49]] : (!fir.array, f32, index, index) -> !fir.array ! CHECK: fir.result %[[VAL_55]] : !fir.array ! CHECK: } -! CHECK: fir.result %[[VAL_56:.*]] : !fir.array +! CHECK: fir.result %[[VAL_51]] : !fir.array ! CHECK: } -! CHECK: fir.array_merge_store %[[VAL_43]], %[[VAL_57:.*]] to %[[VAL_10]] : !fir.array, !fir.array, !fir.heap> +! CHECK: fir.array_merge_store %[[VAL_43]], %[[VAL_48]] to %[[VAL_10]] : !fir.array, !fir.array, !fir.heap> + ! CHECK: fir.result %[[VAL_10]] : !fir.heap> ! CHECK: } -! CHECK: fir.result %[[VAL_23]], %[[VAL_58:.*]] : i1, !fir.heap> + ! CHECK: } else { -! CHECK: %[[VAL_59:.*]] = arith.constant true -! CHECK: %[[VAL_60:.*]] = fir.allocmem !fir.array, %[[VAL_5]]#1, %[[VAL_3]]#1 {uniq_name = ".auto.alloc"} -! CHECK: %[[VAL_61:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2> -! CHECK: %[[VAL_62:.*]] = fir.array_load %[[VAL_60]](%[[VAL_61]]) : (!fir.heap>, !fir.shape<2>) -> !fir.array -! CHECK: %[[VAL_63:.*]] = arith.constant 1 : index -! CHECK: %[[VAL_64:.*]] = arith.constant 0 : index -! CHECK: %[[VAL_65:.*]] = arith.subi %[[VAL_5]]#1, %[[VAL_63]] : index -! CHECK: %[[VAL_66:.*]] = arith.subi %[[VAL_3]]#1, %[[VAL_63]] : index -! CHECK: %[[VAL_67:.*]] = fir.do_loop %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_63]] unordered iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (!fir.array) { -! CHECK: %[[VAL_70:.*]] = fir.do_loop %[[VAL_71:.*]] = %[[VAL_64]] to %[[VAL_65]] step %[[VAL_63]] unordered iter_args(%[[VAL_72:.*]] = %[[VAL_69]]) -> (!fir.array) { + +! CHECK: %[[VAL_60:.*]] = fir.allocmem !fir.array +! CHECK: %[[VAL_62:.*]] = fir.array_load %[[VAL_60]](%{{.*}}) : (!fir.heap>, !fir.shape<2>) -> !fir.array + +! CHECK: %[[VAL_67:.*]] = fir.do_loop %[[VAL_68:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (!fir.array) { +! CHECK: %[[VAL_70:.*]] = fir.do_loop %[[VAL_71:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_72:.*]] = %[[VAL_69]]) -> (!fir.array) { ! CHECK: %[[VAL_73:.*]] = fir.array_fetch %[[VAL_8]], %[[VAL_68]], %[[VAL_71]] : (!fir.array, index, index) -> f32 ! CHECK: %[[VAL_74:.*]] = fir.array_update %[[VAL_72]], %[[VAL_73]], %[[VAL_71]], %[[VAL_68]] : (!fir.array, f32, index, index) -> !fir.array ! CHECK: fir.result %[[VAL_74]] : !fir.array ! CHECK: } -! CHECK: fir.result %[[VAL_75:.*]] : !fir.array +! CHECK: fir.result %[[VAL_70]] : !fir.array ! CHECK: } -! CHECK: fir.array_merge_store %[[VAL_62]], %[[VAL_76:.*]] to %[[VAL_60]] : !fir.array, !fir.array, !fir.heap> -! CHECK: fir.result %[[VAL_59]], %[[VAL_60]] : i1, !fir.heap> -! CHECK: } -! CHECK: fir.if %[[VAL_77:.*]]#0 { -! CHECK: fir.if %[[VAL_13]] { -! CHECK: fir.freemem %[[VAL_10]] : !fir.heap> -! CHECK: } -! CHECK: %[[VAL_78:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2> -! CHECK: %[[VAL_79:.*]] = fir.embox %[[VAL_77]]#1(%[[VAL_78]]) : (!fir.heap>, !fir.shape<2>) -> !fir.box>> -! CHECK: fir.store %[[VAL_79]] to %[[VAL_0]] : !fir.ref>>> +! CHECK: fir.array_merge_store %[[VAL_62]], %[[VAL_67]] to %[[VAL_60]] : !fir.array, !fir.array, !fir.heap> + ! CHECK: } -! CHECK: return -! CHECK: } -end subroutine -! CHECK: func.func private @_QPbar_transpose_test(!fir.ref>) +! CHECK-NOT: @_FortranATranspose +end subroutine diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -215,10 +215,12 @@ auto &defKinds = semanticsContext.defaultKinds(); fir::KindMapping kindMap( &ctx, llvm::ArrayRef{fir::fromDefaultKinds(defKinds)}); + // Use default lowering options for bbc. + Fortran::frontend::LoweringOptions loweringOptions{}; auto burnside = Fortran::lower::LoweringBridge::create( ctx, defKinds, semanticsContext.intrinsics(), semanticsContext.targetCharacteristics(), parsing.allCooked(), "", - kindMap); + kindMap, loweringOptions); burnside.lower(parseTree, semanticsContext); mlir::ModuleOp mlirModule = burnside.getModule(); std::error_code ec;