diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h
--- a/flang/include/flang/Frontend/CompilerInvocation.h
+++ b/flang/include/flang/Frontend/CompilerInvocation.h
@@ -15,6 +15,7 @@
 
 #include "flang/Frontend/CodeGenOptions.h"
 #include "flang/Frontend/FrontendOptions.h"
+#include "flang/Frontend/LoweringOptions.h"
 #include "flang/Frontend/PreprocessorOptions.h"
 #include "flang/Frontend/TargetOptions.h"
 #include "flang/Parser/parsing.h"
@@ -68,6 +69,9 @@
   // of options.
   Fortran::parser::Options parserOpts;
 
+  /// Options controlling lowering.
+  Fortran::frontend::LoweringOptions loweringOptions;
+
   /// Options controlling the target.
   Fortran::frontend::TargetOptions targetOpts;
 
@@ -136,6 +140,9 @@
   CodeGenOptions &getCodeGenOpts() { return codeGenOpts; }
   const CodeGenOptions &getCodeGenOpts() const { return codeGenOpts; }
 
+  LoweringOptions &getLoweringOptions() { return loweringOptions; }
+  const LoweringOptions &getLoweringOptions() const { return loweringOptions; }
+
   Fortran::semantics::SemanticsContext &getSemanticsContext() {
     return *semanticsContext;
   }
diff --git a/flang/include/flang/Frontend/LoweringOptions.h b/flang/include/flang/Frontend/LoweringOptions.h
new file mode 100644
--- /dev/null
+++ b/flang/include/flang/Frontend/LoweringOptions.h
@@ -0,0 +1,36 @@
+//===- LoweringOptions.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Options controlling lowering of front-end fragments to the FIR dialect
+/// of MLIR
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef FLANG_FRONTEND_LOWERINGOPTIONS_H
+#define FLANG_FRONTEND_LOWERINGOPTIONS_H
+
+namespace Fortran::frontend {
+
+class LoweringOptions {
+  /// If true, lower transpose without a runtime call.
+  unsigned optimizeTranspose : 1;
+
+public:
+  LoweringOptions() : optimizeTranspose(true) {}
+
+  bool getOptimizeTranspose() const { return optimizeTranspose; }
+  LoweringOptions &setOptimizeTranspose(bool v) {
+    optimizeTranspose = v;
+    return *this;
+  }
+};
+
+} // namespace Fortran::frontend
+
+#endif // FLANG_FRONTEND_LOWERINGOPTIONS_H
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -14,6 +14,7 @@
 #define FORTRAN_LOWER_ABSTRACTCONVERTER_H
 
 #include "flang/Common/Fortran.h"
+#include "flang/Frontend/LoweringOptions.h"
 #include "flang/Lower/PFTDefs.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Semantics/symbol.h"
@@ -223,7 +224,22 @@
   /// Get the KindMap.
   virtual const fir::KindMapping &getKindMap() = 0;
 
+  AbstractConverter(const Fortran::frontend::LoweringOptions &loweringOptions)
+      : loweringOptions(loweringOptions) {}
   virtual ~AbstractConverter() = default;
+
+  //===--------------------------------------------------------------------===//
+  // Miscellaneous
+  //===--------------------------------------------------------------------===//
+
+  /// Return options controlling lowering behavior.
+  const Fortran::frontend::LoweringOptions &getLoweringOptions() const {
+    return loweringOptions;
+  }
+
+private:
+  /// Options controlling lowering behavior.
+  const Fortran::frontend::LoweringOptions &loweringOptions;
 };
 
 } // namespace lower
diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h
--- a/flang/include/flang/Lower/Bridge.h
+++ b/flang/include/flang/Lower/Bridge.h
@@ -14,6 +14,7 @@
 #define FORTRAN_LOWER_BRIDGE_H
 
 #include "flang/Common/Fortran.h"
+#include "flang/Frontend/LoweringOptions.h"
 #include "flang/Lower/AbstractConverter.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Support/KindMapping.h"
@@ -52,9 +53,10 @@
          const Fortran::evaluate::IntrinsicProcTable &intrinsics,
          const Fortran::evaluate::TargetCharacteristics &targetCharacteristics,
          const Fortran::parser::AllCookedSources &allCooked,
-         llvm::StringRef triple, fir::KindMapping &kindMap) {
+         llvm::StringRef triple, fir::KindMapping &kindMap,
+         const Fortran::frontend::LoweringOptions &loweringOptions) {
     return LoweringBridge(ctx, defaultKinds, intrinsics, targetCharacteristics,
-                          allCooked, triple, kindMap);
+                          allCooked, triple, kindMap, loweringOptions);
   }
 
   //===--------------------------------------------------------------------===//
@@ -83,6 +85,10 @@
   /// Get the kind map.
   const fir::KindMapping &getKindMap() const { return kindMap; }
 
+  const Fortran::frontend::LoweringOptions &getLoweringOptions() const {
+    return loweringOptions;
+  }
+
   /// Create a folding context. Careful: this is very expensive.
   Fortran::evaluate::FoldingContext createFoldingContext() const;
 
@@ -107,7 +113,8 @@
       const Fortran::evaluate::IntrinsicProcTable &intrinsics,
       const Fortran::evaluate::TargetCharacteristics &targetCharacteristics,
       const Fortran::parser::AllCookedSources &cooked, llvm::StringRef triple,
-      fir::KindMapping &kindMap);
+      fir::KindMapping &kindMap,
+      const Fortran::frontend::LoweringOptions &loweringOptions);
   LoweringBridge() = delete;
   LoweringBridge(const LoweringBridge &) = delete;
 
@@ -118,6 +125,7 @@
   mlir::MLIRContext &context;
   std::unique_ptr<mlir::ModuleOp> module;
   fir::KindMapping &kindMap;
+  const Fortran::frontend::LoweringOptions &loweringOptions;
 };
 
 } // namespace lower
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -586,6 +586,17 @@
   return diags.getNumErrors() == numErrorsBefore;
 }
 
+/// Set \p loweringOptions controlling lowering behavior based
+/// on the \p optimizationLevel.
+static bool
+setLoweringOptions(Fortran::frontend::LoweringOptions &loweringOptions,
+                   unsigned optimizationLevel) {
+  // Lower TRANSPOSE as a runtime call under -O0.
+  loweringOptions.setOptimizeTranspose(optimizationLevel > 0);
+
+  return true;
+}
+
 bool CompilerInvocation::createFromArgs(
     CompilerInvocation &res, llvm::ArrayRef<const char *> commandLineArgs,
     clang::DiagnosticsEngine &diags) {
@@ -640,6 +651,9 @@
   res.frontendOpts.mlirArgs =
       args.getAllArgValues(clang::driver::options::OPT_mmlir);
 
+  success &= setLoweringOptions(res.getLoweringOptions(),
+                                res.getCodeGenOpts().OptimizationLevel);
+
   return success;
 }
 
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -148,7 +148,7 @@
       *mlirCtx, defKinds, ci.getInvocation().getSemanticsContext().intrinsics(),
       ci.getInvocation().getSemanticsContext().targetCharacteristics(),
       ci.getParsing().allCooked(), ci.getInvocation().getTargetOpts().triple,
-      kindMap);
+      kindMap, ci.getInvocation().getLoweringOptions());
 
   // Create a parse tree and lower it to FIR
   Fortran::parser::Program &parseTree{*ci.getParsing().parseTree()};
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -179,7 +179,8 @@
 class FirConverter : public Fortran::lower::AbstractConverter {
 public:
   explicit FirConverter(Fortran::lower::LoweringBridge &bridge)
-      : bridge{bridge}, foldingContext{bridge.createFoldingContext()} {}
+      : Fortran::lower::AbstractConverter(bridge.getLoweringOptions()),
+        bridge{bridge}, foldingContext{bridge.createFoldingContext()} {}
   virtual ~FirConverter() = default;
 
   /// Convert the PFT to FIR.
@@ -3217,10 +3218,11 @@
     const Fortran::evaluate::IntrinsicProcTable &intrinsics,
     const Fortran::evaluate::TargetCharacteristics &targetCharacteristics,
     const Fortran::parser::AllCookedSources &cooked, llvm::StringRef triple,
-    fir::KindMapping &kindMap)
+    fir::KindMapping &kindMap,
+    const Fortran::frontend::LoweringOptions &loweringOptions)
     : defaultKinds{defaultKinds}, intrinsics{intrinsics},
       targetCharacteristics{targetCharacteristics}, cooked{&cooked},
-      context{context}, kindMap{kindMap} {
+      context{context}, kindMap{kindMap}, loweringOptions{loweringOptions} {
   // Register the diagnostic handler.
   context.getDiagEngine().registerHandler([](mlir::Diagnostic &diag) {
     llvm::raw_ostream &os = llvm::errs();
diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp
--- a/flang/lib/Lower/ConvertExpr.cpp
+++ b/flang/lib/Lower/ConvertExpr.cpp
@@ -88,12 +88,16 @@
 // from the "inline" FIR, e.g. it may diagnose out-of-memory conditions
 // during the temporary allocation whereas the inline implementation
 // relies on AllocMemOp that will silently return null in case
-// there is not enough memory. So it may be a good idea to set
-// this option to false for -O0.
+// there is not enough memory.
+//
+// If it is set to false, then TRANSPOSE will be lowered using
+// a runtime call. If it is set to true, then the lowering is controlled
+// by LoweringOptions::optimizeTranspose bit (see isTransposeOptEnabled
+// function in this file).
 static llvm::cl::opt<bool> optimizeTranspose(
     "opt-transpose",
     llvm::cl::desc("lower transpose without using a runtime call"),
-    llvm::cl::init(true));
+    llvm::cl::init(true), llvm::cl::Hidden);
 
 /// The various semantics of a program constituent (or a part thereof) as it may
 /// appear in an expression.
@@ -595,36 +599,50 @@
          module->name().ToString().find("omp_lib") == std::string::npos;
 }
 
+// Return true if TRANSPOSE should be lowered without a runtime call.
+static bool
+isTransposeOptEnabled(const Fortran::lower::AbstractConverter &converter) {
+  return optimizeTranspose &&
+         converter.getLoweringOptions().getOptimizeTranspose();
+}
+
 // A set of visitors to detect if the given expression
 // is a TRANSPOSE call that should be lowered without using
 // runtime TRANSPOSE implementation.
 template <typename T>
-static bool isOptimizableTranspose(const T &) {
+static bool isOptimizableTranspose(const T &,
+                                   const Fortran::lower::AbstractConverter &) {
   return false;
 }
 
 static bool
-isOptimizableTranspose(const Fortran::evaluate::ProcedureRef &procRef) {
+isOptimizableTranspose(const Fortran::evaluate::ProcedureRef &procRef,
+                       const Fortran::lower::AbstractConverter &converter) {
   const Fortran::evaluate::SpecificIntrinsic *intrin =
       procRef.proc().GetSpecificIntrinsic();
-  return optimizeTranspose && intrin && intrin->name == "transpose";
+  return isTransposeOptEnabled(converter) && intrin &&
+         intrin->name == "transpose";
 }
 
 template <typename T>
 static bool
-isOptimizableTranspose(const Fortran::evaluate::FunctionRef<T> &funcRef) {
+isOptimizableTranspose(const Fortran::evaluate::FunctionRef<T> &funcRef,
+                       const Fortran::lower::AbstractConverter &converter) {
   return isOptimizableTranspose(
-      static_cast<const Fortran::evaluate::ProcedureRef &>(funcRef));
+      static_cast<const Fortran::evaluate::ProcedureRef &>(funcRef), converter);
 }
 
 template <typename T>
-static bool isOptimizableTranspose(Fortran::evaluate::Expr<T> expr) {
+static bool
+isOptimizableTranspose(Fortran::evaluate::Expr<T> expr,
+                       const Fortran::lower::AbstractConverter &converter) {
   // If optimizeTranspose is not enabled, return false right away.
-  if (!optimizeTranspose)
+  if (!isTransposeOptEnabled(converter))
     return false;
 
-  return std::visit([&](const auto &e) { return isOptimizableTranspose(e); },
-                    expr.u);
+  return std::visit(
+      [&](const auto &e) { return isOptimizableTranspose(e, converter); },
+      expr.u);
 }
 
 namespace {
@@ -3289,7 +3307,7 @@
     // is used to not create a new temporary storage.
     if (isScalar(x) ||
         Fortran::evaluate::UnwrapWholeSymbolOrComponentDataRef(x) ||
-        (isTransformationalRef(x) && !isOptimizableTranspose(x)))
+        (isTransformationalRef(x) && !isOptimizableTranspose(x, converter)))
       return std::visit([&](const auto &e) { return genref(e); }, x.u);
     if (useBoxArg)
       return asArrayArg(x);
@@ -5139,7 +5157,7 @@
                 llvm::Optional<mlir::Type> retTy) {
     mlir::Location loc = getLoc();
 
-    if (isOptimizableTranspose(procRef))
+    if (isOptimizableTranspose(procRef, converter))
       return genTransposeProcRef(procRef);
 
     if (procRef.IsElemental()) {
diff --git a/flang/test/Lower/Intrinsics/transpose.f90 b/flang/test/Lower/Intrinsics/transpose.f90
--- a/flang/test/Lower/Intrinsics/transpose.f90
+++ b/flang/test/Lower/Intrinsics/transpose.f90
@@ -1,4 +1,5 @@
 ! RUN: bbc -emit-fir %s -opt-transpose=false -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -O0 %s -o - | FileCheck %s
 
 ! CHECK-LABEL: func @_QPtranspose_test(
 ! CHECK-SAME: %[[source:.*]]: !fir.ref<!fir.array<2x3xf32>>{{.*}}) {
diff --git a/flang/test/Lower/Intrinsics/transpose_opt.f90 b/flang/test/Lower/Intrinsics/transpose_opt.f90
--- a/flang/test/Lower/Intrinsics/transpose_opt.f90
+++ b/flang/test/Lower/Intrinsics/transpose_opt.f90
@@ -1,37 +1,31 @@
 ! RUN: bbc -emit-fir %s -opt-transpose=true -o - | FileCheck %s
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -O1 %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -O2 %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-fir -O3 %s -o - | FileCheck %s
 
 ! CHECK-LABEL: func.func @_QPtranspose_test(
 ! CHECK-SAME:                               %[[VAL_0:.*]]: !fir.ref<!fir.array<2x3xf32>> {fir.bindc_name = "mat"}) {
 subroutine transpose_test(mat)
    real :: mat(2,3)
    call bar_transpose_test(transpose(mat))
-! CHECK:         %[[VAL_1:.*]] = arith.constant 2 : index
-! CHECK:         %[[VAL_2:.*]] = arith.constant 3 : index
-! CHECK:         %[[VAL_3:.*]] = arith.constant 3 : index
-! CHECK:         %[[VAL_4:.*]] = arith.constant 2 : index
-! CHECK:         %[[VAL_5:.*]] = fir.shape %[[VAL_1]], %[[VAL_2]] : (index, index) -> !fir.shape<2>
-! CHECK:         %[[VAL_6:.*]] = fir.array_load %[[VAL_0]](%[[VAL_5]]) : (!fir.ref<!fir.array<2x3xf32>>, !fir.shape<2>) -> !fir.array<2x3xf32>
+! CHECK:         %[[VAL_6:.*]] = fir.array_load %[[VAL_0]](%{{.*}}) : (!fir.ref<!fir.array<2x3xf32>>, !fir.shape<2>) -> !fir.array<2x3xf32>
 ! CHECK:         %[[VAL_7:.*]] = fir.allocmem !fir.array<3x2xf32>
-! CHECK:         %[[VAL_8:.*]] = fir.shape %[[VAL_3]], %[[VAL_4]] : (index, index) -> !fir.shape<2>
-! CHECK:         %[[VAL_9:.*]] = fir.array_load %[[VAL_7]](%[[VAL_8]]) : (!fir.heap<!fir.array<3x2xf32>>, !fir.shape<2>) -> !fir.array<3x2xf32>
-! CHECK:         %[[VAL_10:.*]] = arith.constant 1 : index
-! CHECK:         %[[VAL_11:.*]] = arith.constant 0 : index
-! CHECK:         %[[VAL_12:.*]] = arith.subi %[[VAL_3]], %[[VAL_10]] : index
-! CHECK:         %[[VAL_13:.*]] = arith.subi %[[VAL_4]], %[[VAL_10]] : index
-! CHECK:         %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_11]] to %[[VAL_13]] step %[[VAL_10]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (!fir.array<3x2xf32>) {
-! CHECK:           %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_10]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (!fir.array<3x2xf32>) {
+! CHECK:         %[[VAL_9:.*]] = fir.array_load %[[VAL_7]](%{{.*}}) : (!fir.heap<!fir.array<3x2xf32>>, !fir.shape<2>) -> !fir.array<3x2xf32>
+! CHECK:         %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (!fir.array<3x2xf32>) {
+! CHECK:           %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (!fir.array<3x2xf32>) {
 ! CHECK:             %[[VAL_20:.*]] = fir.array_fetch %[[VAL_6]], %[[VAL_15]], %[[VAL_18]] : (!fir.array<2x3xf32>, index, index) -> f32
 ! CHECK:             %[[VAL_21:.*]] = fir.array_update %[[VAL_19]], %[[VAL_20]], %[[VAL_18]], %[[VAL_15]] : (!fir.array<3x2xf32>, f32, index, index) -> !fir.array<3x2xf32>
 ! CHECK:             fir.result %[[VAL_21]] : !fir.array<3x2xf32>
 ! CHECK:           }
-! CHECK:           fir.result %[[VAL_22:.*]] : !fir.array<3x2xf32>
+! CHECK:           fir.result %[[VAL_17]] : !fir.array<3x2xf32>
 ! CHECK:         }
-! CHECK:         fir.array_merge_store %[[VAL_9]], %[[VAL_23:.*]] to %[[VAL_7]] : !fir.array<3x2xf32>, !fir.array<3x2xf32>, !fir.heap<!fir.array<3x2xf32>>
+! CHECK:         fir.array_merge_store %[[VAL_9]], %[[VAL_14]] to %[[VAL_7]] : !fir.array<3x2xf32>, !fir.array<3x2xf32>, !fir.heap<!fir.array<3x2xf32>>
 ! CHECK:         %[[VAL_24:.*]] = fir.convert %[[VAL_7]] : (!fir.heap<!fir.array<3x2xf32>>) -> !fir.ref<!fir.array<3x2xf32>>
 ! CHECK:         fir.call @_QPbar_transpose_test(%[[VAL_24]]) : (!fir.ref<!fir.array<3x2xf32>>) -> ()
 ! CHECK:         fir.freemem %[[VAL_7]] : !fir.heap<!fir.array<3x2xf32>>
-! CHECK:         return
-! CHECK:       }
+
+! CHECK-NOT: @_FortranATranspose
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPtranspose_allocatable_test(
@@ -39,96 +33,66 @@
 subroutine transpose_allocatable_test(mat)
   real, allocatable :: mat(:,:)
   mat = transpose(mat)
+! Verify that the "optimized" TRANSPOSE loops are generated
+! three times in each branch checking the status of LHS allocatable.
+
 ! CHECK:         %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
-! CHECK:         %[[VAL_2:.*]] = arith.constant 0 : index
-! CHECK:         %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_2]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
-! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : index
-! CHECK:         %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
 ! CHECK:         %[[VAL_6:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.heap<!fir.array<?x?xf32>>
-! CHECK:         %[[VAL_7:.*]] = fir.shape_shift %[[VAL_3]]#0, %[[VAL_3]]#1, %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index, index, index) -> !fir.shapeshift<2>
-! CHECK:         %[[VAL_8:.*]] = fir.array_load %[[VAL_6]](%[[VAL_7]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.array<?x?xf32>
+! CHECK:         %[[VAL_8:.*]] = fir.array_load %[[VAL_6]](%{{.*}}) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shapeshift<2>) -> !fir.array<?x?xf32>
+
 ! CHECK:         %[[VAL_9:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
 ! CHECK:         %[[VAL_10:.*]] = fir.box_addr %[[VAL_9]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>) -> !fir.heap<!fir.array<?x?xf32>>
-! CHECK:         %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (!fir.heap<!fir.array<?x?xf32>>) -> i64
-! CHECK:         %[[VAL_12:.*]] = arith.constant 0 : i64
-! CHECK:         %[[VAL_13:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_12]] : i64
-! CHECK:         %[[VAL_14:.*]]:2 = fir.if %[[VAL_13]] -> (i1, !fir.heap<!fir.array<?x?xf32>>) {
-! CHECK:           %[[VAL_15:.*]] = arith.constant false
-! CHECK:           %[[VAL_16:.*]] = arith.constant 0 : index
-! CHECK:           %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_16]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
-! CHECK:           %[[VAL_18:.*]] = arith.constant 1 : index
-! CHECK:           %[[VAL_19:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_18]] : (!fir.box<!fir.heap<!fir.array<?x?xf32>>>, index) -> (index, index, index)
-! CHECK:           %[[VAL_20:.*]] = arith.cmpi ne, %[[VAL_17]]#1, %[[VAL_5]]#1 : index
-! CHECK:           %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_20]], %[[VAL_15]] : i1
-! CHECK:           %[[VAL_22:.*]] = arith.cmpi ne, %[[VAL_19]]#1, %[[VAL_3]]#1 : index
-! CHECK:           %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_22]], %[[VAL_21]] : i1
-! CHECK:           %[[VAL_24:.*]] = fir.if %[[VAL_23]] -> (!fir.heap<!fir.array<?x?xf32>>) {
-! CHECK:             %[[VAL_25:.*]] = fir.allocmem !fir.array<?x?xf32>, %[[VAL_5]]#1, %[[VAL_3]]#1 {uniq_name = ".auto.alloc"}
-! CHECK:             %[[VAL_26:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2>
-! CHECK:             %[[VAL_27:.*]] = fir.array_load %[[VAL_25]](%[[VAL_26]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
-! CHECK:             %[[VAL_28:.*]] = arith.constant 1 : index
-! CHECK:             %[[VAL_29:.*]] = arith.constant 0 : index
-! CHECK:             %[[VAL_30:.*]] = arith.subi %[[VAL_5]]#1, %[[VAL_28]] : index
-! CHECK:             %[[VAL_31:.*]] = arith.subi %[[VAL_3]]#1, %[[VAL_28]] : index
-! CHECK:             %[[VAL_32:.*]] = fir.do_loop %[[VAL_33:.*]] = %[[VAL_29]] to %[[VAL_31]] step %[[VAL_28]] unordered iter_args(%[[VAL_34:.*]] = %[[VAL_27]]) -> (!fir.array<?x?xf32>) {
-! CHECK:               %[[VAL_35:.*]] = fir.do_loop %[[VAL_36:.*]] = %[[VAL_29]] to %[[VAL_30]] step %[[VAL_28]] unordered iter_args(%[[VAL_37:.*]] = %[[VAL_34]]) -> (!fir.array<?x?xf32>) {
+
+! CHECK:         %[[VAL_14:.*]]:2 = fir.if %{{.*}} -> (i1, !fir.heap<!fir.array<?x?xf32>>) {
+
+! CHECK:           %[[VAL_24:.*]] = fir.if %{{.*}} -> (!fir.heap<!fir.array<?x?xf32>>) {
+
+! CHECK:             %[[VAL_25:.*]] = fir.allocmem !fir.array<?x?xf32>
+! CHECK:             %[[VAL_27:.*]] = fir.array_load %[[VAL_25]](%{{.*}}) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
+
+! CHECK:             %[[VAL_32:.*]] = fir.do_loop %[[VAL_33:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_34:.*]] = %[[VAL_27]]) -> (!fir.array<?x?xf32>) {
+! CHECK:               %[[VAL_35:.*]] = fir.do_loop %[[VAL_36:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_37:.*]] = %[[VAL_34]]) -> (!fir.array<?x?xf32>) {
 ! CHECK:                 %[[VAL_38:.*]] = fir.array_fetch %[[VAL_8]], %[[VAL_33]], %[[VAL_36]] : (!fir.array<?x?xf32>, index, index) -> f32
 ! CHECK:                 %[[VAL_39:.*]] = fir.array_update %[[VAL_37]], %[[VAL_38]], %[[VAL_36]], %[[VAL_33]] : (!fir.array<?x?xf32>, f32, index, index) -> !fir.array<?x?xf32>
 ! CHECK:                 fir.result %[[VAL_39]] : !fir.array<?x?xf32>
 ! CHECK:               }
-! CHECK:               fir.result %[[VAL_40:.*]] : !fir.array<?x?xf32>
+! CHECK:               fir.result %[[VAL_35]] : !fir.array<?x?xf32>
 ! CHECK:             }
-! CHECK:             fir.array_merge_store %[[VAL_27]], %[[VAL_41:.*]] to %[[VAL_25]] : !fir.array<?x?xf32>, !fir.array<?x?xf32>, !fir.heap<!fir.array<?x?xf32>>
-! CHECK:             fir.result %[[VAL_25]] : !fir.heap<!fir.array<?x?xf32>>
+! CHECK:             fir.array_merge_store %[[VAL_27]], %[[VAL_32]] to %[[VAL_25]] : !fir.array<?x?xf32>, !fir.array<?x?xf32>, !fir.heap<!fir.array<?x?xf32>>
+
 ! CHECK:           } else {
-! CHECK:             %[[VAL_42:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2>
-! CHECK:             %[[VAL_43:.*]] = fir.array_load %[[VAL_10]](%[[VAL_42]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
-! CHECK:             %[[VAL_44:.*]] = arith.constant 1 : index
-! CHECK:             %[[VAL_45:.*]] = arith.constant 0 : index
-! CHECK:             %[[VAL_46:.*]] = arith.subi %[[VAL_5]]#1, %[[VAL_44]] : index
-! CHECK:             %[[VAL_47:.*]] = arith.subi %[[VAL_3]]#1, %[[VAL_44]] : index
-! CHECK:             %[[VAL_48:.*]] = fir.do_loop %[[VAL_49:.*]] = %[[VAL_45]] to %[[VAL_47]] step %[[VAL_44]] unordered iter_args(%[[VAL_50:.*]] = %[[VAL_43]]) -> (!fir.array<?x?xf32>) {
-! CHECK:               %[[VAL_51:.*]] = fir.do_loop %[[VAL_52:.*]] = %[[VAL_45]] to %[[VAL_46]] step %[[VAL_44]] unordered iter_args(%[[VAL_53:.*]] = %[[VAL_50]]) -> (!fir.array<?x?xf32>) {
+
+! CHECK:             %[[VAL_43:.*]] = fir.array_load %[[VAL_10]](%{{.*}}) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
+
+! CHECK:             %[[VAL_48:.*]] = fir.do_loop %[[VAL_49:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_50:.*]] = %[[VAL_43]]) -> (!fir.array<?x?xf32>) {
+! CHECK:               %[[VAL_51:.*]] = fir.do_loop %[[VAL_52:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_53:.*]] = %[[VAL_50]]) -> (!fir.array<?x?xf32>) {
 ! CHECK:                 %[[VAL_54:.*]] = fir.array_fetch %[[VAL_8]], %[[VAL_49]], %[[VAL_52]] : (!fir.array<?x?xf32>, index, index) -> f32
 ! CHECK:                 %[[VAL_55:.*]] = fir.array_update %[[VAL_53]], %[[VAL_54]], %[[VAL_52]], %[[VAL_49]] : (!fir.array<?x?xf32>, f32, index, index) -> !fir.array<?x?xf32>
 ! CHECK:                 fir.result %[[VAL_55]] : !fir.array<?x?xf32>
 ! CHECK:               }
-! CHECK:               fir.result %[[VAL_56:.*]] : !fir.array<?x?xf32>
+! CHECK:               fir.result %[[VAL_51]] : !fir.array<?x?xf32>
 ! CHECK:             }
-! CHECK:             fir.array_merge_store %[[VAL_43]], %[[VAL_57:.*]] to %[[VAL_10]] : !fir.array<?x?xf32>, !fir.array<?x?xf32>, !fir.heap<!fir.array<?x?xf32>>
+! CHECK:             fir.array_merge_store %[[VAL_43]], %[[VAL_48]] to %[[VAL_10]] : !fir.array<?x?xf32>, !fir.array<?x?xf32>, !fir.heap<!fir.array<?x?xf32>>
+
 ! CHECK:             fir.result %[[VAL_10]] : !fir.heap<!fir.array<?x?xf32>>
 ! CHECK:           }
-! CHECK:           fir.result %[[VAL_23]], %[[VAL_58:.*]] : i1, !fir.heap<!fir.array<?x?xf32>>
+
 ! CHECK:         } else {
-! CHECK:           %[[VAL_59:.*]] = arith.constant true
-! CHECK:           %[[VAL_60:.*]] = fir.allocmem !fir.array<?x?xf32>, %[[VAL_5]]#1, %[[VAL_3]]#1 {uniq_name = ".auto.alloc"}
-! CHECK:           %[[VAL_61:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2>
-! CHECK:           %[[VAL_62:.*]] = fir.array_load %[[VAL_60]](%[[VAL_61]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
-! CHECK:           %[[VAL_63:.*]] = arith.constant 1 : index
-! CHECK:           %[[VAL_64:.*]] = arith.constant 0 : index
-! CHECK:           %[[VAL_65:.*]] = arith.subi %[[VAL_5]]#1, %[[VAL_63]] : index
-! CHECK:           %[[VAL_66:.*]] = arith.subi %[[VAL_3]]#1, %[[VAL_63]] : index
-! CHECK:           %[[VAL_67:.*]] = fir.do_loop %[[VAL_68:.*]] = %[[VAL_64]] to %[[VAL_66]] step %[[VAL_63]] unordered iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (!fir.array<?x?xf32>) {
-! CHECK:             %[[VAL_70:.*]] = fir.do_loop %[[VAL_71:.*]] = %[[VAL_64]] to %[[VAL_65]] step %[[VAL_63]] unordered iter_args(%[[VAL_72:.*]] = %[[VAL_69]]) -> (!fir.array<?x?xf32>) {
+
+! CHECK:           %[[VAL_60:.*]] = fir.allocmem !fir.array<?x?xf32>
+! CHECK:           %[[VAL_62:.*]] = fir.array_load %[[VAL_60]](%{{.*}}) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
+
+! CHECK:           %[[VAL_67:.*]] = fir.do_loop %[[VAL_68:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_69:.*]] = %[[VAL_62]]) -> (!fir.array<?x?xf32>) {
+! CHECK:             %[[VAL_70:.*]] = fir.do_loop %[[VAL_71:.*]] = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%[[VAL_72:.*]] = %[[VAL_69]]) -> (!fir.array<?x?xf32>) {
 ! CHECK:               %[[VAL_73:.*]] = fir.array_fetch %[[VAL_8]], %[[VAL_68]], %[[VAL_71]] : (!fir.array<?x?xf32>, index, index) -> f32
 ! CHECK:               %[[VAL_74:.*]] = fir.array_update %[[VAL_72]], %[[VAL_73]], %[[VAL_71]], %[[VAL_68]] : (!fir.array<?x?xf32>, f32, index, index) -> !fir.array<?x?xf32>
 ! CHECK:               fir.result %[[VAL_74]] : !fir.array<?x?xf32>
 ! CHECK:             }
-! CHECK:             fir.result %[[VAL_75:.*]] : !fir.array<?x?xf32>
+! CHECK:             fir.result %[[VAL_70]] : !fir.array<?x?xf32>
 ! CHECK:           }
-! CHECK:           fir.array_merge_store %[[VAL_62]], %[[VAL_76:.*]] to %[[VAL_60]] : !fir.array<?x?xf32>, !fir.array<?x?xf32>, !fir.heap<!fir.array<?x?xf32>>
-! CHECK:           fir.result %[[VAL_59]], %[[VAL_60]] : i1, !fir.heap<!fir.array<?x?xf32>>
-! CHECK:         }
-! CHECK:         fir.if %[[VAL_77:.*]]#0 {
-! CHECK:           fir.if %[[VAL_13]] {
-! CHECK:             fir.freemem %[[VAL_10]] : !fir.heap<!fir.array<?x?xf32>>
-! CHECK:           }
-! CHECK:           %[[VAL_78:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_3]]#1 : (index, index) -> !fir.shape<2>
-! CHECK:           %[[VAL_79:.*]] = fir.embox %[[VAL_77]]#1(%[[VAL_78]]) : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.box<!fir.heap<!fir.array<?x?xf32>>>
-! CHECK:           fir.store %[[VAL_79]] to %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>>
+! CHECK:           fir.array_merge_store %[[VAL_62]], %[[VAL_67]] to %[[VAL_60]] : !fir.array<?x?xf32>, !fir.array<?x?xf32>, !fir.heap<!fir.array<?x?xf32>>
+
 ! CHECK:         }
-! CHECK:         return
-! CHECK:       }
-end subroutine
 
-! CHECK:       func.func private @_QPbar_transpose_test(!fir.ref<!fir.array<3x2xf32>>)
+! CHECK-NOT: @_FortranATranspose
+end subroutine
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -215,10 +215,12 @@
   auto &defKinds = semanticsContext.defaultKinds();
   fir::KindMapping kindMap(
       &ctx, llvm::ArrayRef<fir::KindTy>{fir::fromDefaultKinds(defKinds)});
+  // Use default lowering options for bbc.
+  Fortran::frontend::LoweringOptions loweringOptions{};
   auto burnside = Fortran::lower::LoweringBridge::create(
       ctx, defKinds, semanticsContext.intrinsics(),
       semanticsContext.targetCharacteristics(), parsing.allCooked(), "",
-      kindMap);
+      kindMap, loweringOptions);
   burnside.lower(parseTree, semanticsContext);
   mlir::ModuleOp mlirModule = burnside.getModule();
   std::error_code ec;