diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -55,6 +55,7 @@
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
 std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
 std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
+std::unique_ptr<mlir::Pass> createLoopVersioningPass();
 
 std::unique_ptr<mlir::Pass>
 createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -237,4 +237,15 @@
   let constructor = "::fir::createAlgebraicSimplificationPass()";
 }
 
+def LoopVersioning : Pass<"loop-versioning", "mlir::func::FuncOp"> {
+  let summary = "Loop Versioning";
+  let description = [{
+    Loop Versioning pass adds a check and two variants of a loop when the input
+    array is an assumed size array, to optimise for the (often common) case where
+    an array has element sized strid. A fixed stride allows for the loop to be
+    vectorized as well as other loop optimisations.
+  }];
+  let constructor = "::fir::createLoopVersioningPass()";
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -156,7 +156,7 @@
 ///
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 inline void createDefaultFIROptimizerPassPipeline(
-    mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) {
+	mlir::PassManager &pm, bool loopVersioning, llvm::OptimizationLevel optLevel = defaultOptLevel) {
   // simplify the IR
   mlir::GreedyRewriteConfig config;
   config.enableRegionSimplification = false;
@@ -170,6 +170,12 @@
     pm.addPass(fir::createSimplifyIntrinsicsPass());
     pm.addPass(fir::createAlgebraicSimplificationPass(config));
   }
+
+  if (loopVersioning) {
+	  llvm::outs() << "Creating loop versioning pass\n";
+    pm.addPass(fir::createLoopVersioningPass());
+  }
+  
   pm.addPass(mlir::createCSEPass());
   fir::addMemoryAllocationOpt(pm);
 
@@ -209,9 +215,9 @@
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 inline void createMLIRToLLVMPassPipeline(
-    mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) {
+	mlir::PassManager &pm, bool loopVersioning, llvm::OptimizationLevel optLevel = defaultOptLevel) {
   // Add default optimizer pass pipeline.
-  fir::createDefaultFIROptimizerPassPipeline(pm, optLevel);
+  fir::createDefaultFIROptimizerPassPipeline(pm, loopVersioning, optLevel);
 
   // Add codegen pass pipeline.
   fir::createDefaultFIRCodeGenPassPipeline(pm);
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -131,7 +131,6 @@
     opts.LoopVersioning = 1;
   }
 
-
   for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ))
     opts.LLVMPassPlugins.push_back(a->getValue());
 
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -12,6 +12,7 @@
   SimplifyRegionLite.cpp
   AlgebraicSimplification.cpp
   SimplifyIntrinsics.cpp
+  LoopVersioning.cpp
 
   DEPENDS
   FIRBuilder
diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
new file mode 100644
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
@@ -0,0 +1,155 @@
+//===- LoopVersioning.cpp -- improve loop performance by duplicating certain
+// loops -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// This pass looks for loops iterating over assumed size loops, that can
+/// be optimized by "guessing" that the stride is element-sized.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Support/FIRContext.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "flang/Runtime/entry-names.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+
+namespace fir {
+#define GEN_PASS_DEF_LOOPVERSIONING
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "flang-loop-versioning"
+
+namespace {
+
+class LoopVersioningPass
+    : public fir::impl::LoopVersioningBase<LoopVersioningPass> {
+
+public:
+  void runOnOperation() override;
+};
+
+} // namespace
+
+void LoopVersioningPass::runOnOperation() {
+  LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
+  auto *context = &getContext();
+  auto func = getOperation();
+
+  // First look for arguments with unknown size.
+  LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");
+  auto args = func.getArguments();
+  mlir::SmallVector<mlir::Value *> argsOfInterest;
+  for (auto &arg : args) {
+    mlir::Type argTy = arg.getType();
+    if (auto boxTy = argTy.dyn_cast<fir::BoxType>()) {
+      if (auto seqTy = boxTy.getEleTy().dyn_cast<fir::SequenceType>()) {
+        if (seqTy.hasDynamicExtents()) {
+          argsOfInterest.push_back(&arg);
+        }
+      }
+    }
+  }
+
+  if (argsOfInterest.empty()) {
+    return;
+  }
+
+  // Now see if those arguments are used inside any loop.
+  mlir::SmallVector<mlir::Operation *> opsOfInterest;
+
+  // TODO: Do we need to find the innermost loop? Or the one operating on the
+  func.walk([&](mlir::Operation *op) {
+    if (fir::DoLoopOp loop = mlir::dyn_cast<fir::DoLoopOp>(op)) {
+      auto &body = *loop.getBody();
+      body.walk([&](mlir::Operation *op2) {
+        for (auto operand : op2->getOperands()) {
+          for (auto a : argsOfInterest) {
+            if (*a == operand) {
+              if (std::find(opsOfInterest.begin(), opsOfInterest.end(), op) ==
+                  opsOfInterest.end()) {
+                opsOfInterest.push_back(op);
+              }
+            }
+          }
+        }
+      });
+    }
+  });
+  if (opsOfInterest.empty()) {
+    return;
+  }
+
+  // Ok, so we have some work to do here...
+  mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();
+  fir::KindMapping kindMap = fir::getKindMapping(module);
+
+  fir::FirOpBuilder builder{module, kindMap};
+  auto loc = builder.getUnknownLoc();
+  mlir::IndexType idxTy = builder.getIndexType();
+
+  LLVM_DEBUG(llvm::dbgs() << "opsOfInterest: " << opsOfInterest.size() << "\n");
+  for (auto op : opsOfInterest) {
+    LLVM_DEBUG(op->dump());
+    // TODO: We need to use the correct argument here!
+    builder.setInsertionPoint(op);
+
+    // TODO: Correct index?
+    mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, 0);
+    auto dims = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,
+                                               *argsOfInterest[0], dimIdx);
+    mlir::Value elemSize = builder.createIntegerConstant(loc, idxTy, 4);
+
+    mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::eq, dims.getResult(2), elemSize);
+
+    auto ifOp = builder.create<fir::IfOp>(loc, cmp, /*withElse=*/false);
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+
+    auto elementType =
+        fir::unwrapSeqOrBoxedSeqType(argsOfInterest[0]->getType());
+
+    fir::SequenceType::Shape flatShape(1, 100);
+    mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
+    mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+    mlir::Type refArrTy = builder.getRefType(arrTy);
+
+    auto carg =
+        builder.create<fir::ConvertOp>(loc, boxArrTy, *argsOfInterest[0]);
+    auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg);
+    // Now stick the loop in there.
+    mlir::Operation *clonedLoop = builder.insert(op->clone());
+    clonedLoop->walk([&](mlir::Operation *op) {
+      op->replaceUsesOfWith(*argsOfInterest[0], caddr);
+    });
+  }
+  module->dump();
+
+  LLVM_DEBUG(llvm::dbgs() << "This one is a match\n");
+  LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
+}
+
+std::unique_ptr<mlir::Pass> fir::createLoopVersioningPass() {
+  return std::make_unique<LoopVersioningPass>();
+}
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -273,7 +273,8 @@
     pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
 
     // Add O2 optimizer pass pipeline.
-    fir::createDefaultFIROptimizerPassPipeline(pm, false, llvm::OptimizationLevel::O2);
+    fir::createDefaultFIROptimizerPassPipeline(pm, false,
+                                               llvm::OptimizationLevel::O2);
   }
 
   if (mlir::succeeded(pm.run(mlirModule))) {
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -122,7 +122,7 @@
       fir::createDefaultFIRCodeGenPassPipeline(pm);
     } else {
       // Run tco with O2 by default.
-	    fir::createMLIRToLLVMPassPipeline(pm, false, llvm::OptimizationLevel::O2);
+      fir::createMLIRToLLVMPassPipeline(pm, false, llvm::OptimizationLevel::O2);
     }
     fir::addLLVMDialectToLLVMPass(pm, out.os());
   }