Index: flang/runtime/extrema.cpp
===================================================================
--- flang/runtime/extrema.cpp
+++ flang/runtime/extrema.cpp
@@ -7,15 +7,20 @@
 //===----------------------------------------------------------------------===//
 
 // Implements MAXLOC, MINLOC, MAXVAL, & MINVAL for all required operand types
-// and shapes and (for MAXLOC & MINLOC) result integer kinds.
+// and shapes and (for MAXLOC & MINLOC) result integer kinds.  Also implements
+// NORM2 using common infrastructure.
 
 #include "character.h"
 #include "reduction-templates.h"
 #include "reduction.h"
 #include "flang/Common/long-double.h"
+#include <algorithm>
 #include <cinttypes>
+#include <cmath>
+#include <optional>
 
 namespace Fortran::runtime {
+
 // MAXLOC & MINLOC
 
 template <typename T, bool IS_MAX, bool BACK> struct NumericCompare {
@@ -332,11 +337,11 @@
       NumericExtremumAccumulator<CAT, KIND, IS_MAXVAL>{x}, intrinsic);
 }
 
-template <TypeCategory CAT, int KIND, bool IS_MAXVAL,
-    template <TypeCategory, int, bool> class ACCUMULATOR>
-static void DoMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
+template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
+static void DoMaxMinNorm2(Descriptor &result, const Descriptor &x, int dim,
     const Descriptor *mask, const char *intrinsic, Terminator &terminator) {
   using Type = CppTypeFor<CAT, KIND>;
+  ACCUMULATOR accumulator{x};
   if (dim == 0 || x.rank() == 1) {
     // Total reduction
     result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
@@ -345,14 +350,11 @@
       terminator.Crash(
           "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
     }
-    ACCUMULATOR<CAT, KIND, IS_MAXVAL> accumulator{x};
     DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
     accumulator.GetResult(result.OffsetElement<Type>());
   } else {
     // Partial reduction
-    using Accumulator = ACCUMULATOR<CAT, KIND, IS_MAXVAL>;
-    Accumulator accumulator{x};
-    PartialReduction<Accumulator, CAT, KIND>(
+    PartialReduction<ACCUMULATOR, CAT, KIND>(
         result, x, dim, mask, terminator, intrinsic, accumulator);
   }
 }
@@ -362,7 +364,8 @@
     void operator()(Descriptor &result, const Descriptor &x, int dim,
         const Descriptor *mask, const char *intrinsic,
         Terminator &terminator) const {
-      DoMaxOrMin<CAT, KIND, IS_MAXVAL, NumericExtremumAccumulator>(
+      DoMaxMinNorm2<CAT, KIND,
+          NumericExtremumAccumulator<CAT, KIND, IS_MAXVAL>>(
           result, x, dim, mask, intrinsic, terminator);
     }
   };
@@ -392,8 +395,7 @@
   }
 }
 
-template <TypeCategory, int KIND, bool IS_MAXVAL>
-class CharacterExtremumAccumulator {
+template <int KIND, bool IS_MAXVAL> class CharacterExtremumAccumulator {
 public:
   using Type = CppTypeFor<TypeCategory::Character, KIND>;
   explicit CharacterExtremumAccumulator(const Descriptor &array)
@@ -434,8 +436,8 @@
     void operator()(Descriptor &result, const Descriptor &x, int dim,
         const Descriptor *mask, const char *intrinsic,
         Terminator &terminator) const {
-      DoMaxOrMin<TypeCategory::Character, KIND, IS_MAXVAL,
-          CharacterExtremumAccumulator>(
+      DoMaxMinNorm2<TypeCategory::Character, KIND,
+          CharacterExtremumAccumulator<KIND, IS_MAXVAL>>(
           result, x, dim, mask, intrinsic, terminator);
     }
   };
@@ -589,4 +591,91 @@
   }
 }
 } // extern "C"
+
+// NORM2
+
+template <int KIND> class Norm2Accumulator {
+public:
+  using Type = CppTypeFor<TypeCategory::Real, KIND>;
+  // Use at least double precision for accumulators
+  using AccumType = CppTypeFor<TypeCategory::Real, std::max(KIND, 8)>;
+  explicit Norm2Accumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() { max_ = sum_ = 0; }
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    // m * sqrt(1 + sum((others(:)/m)**2))
+    *p = static_cast<Type>(max_ * std::sqrt(1 + sum_));
+  }
+  bool Accumulate(Type x) {
+    auto absX{AccumType{std::abs(x)}};
+    if (!max_) {
+      max_ = x;
+    } else if (absX > max_) {
+      auto t{max_ / absX}; // < 1.0
+      auto tsq{t * t};
+      sum_ *= tsq; // scale sum to reflect change to the max
+      sum_ += tsq; // include a term for the previous max
+      max_ = absX;
+    } else { // absX <= max_
+      auto t{absX / max_};
+      sum_ += t * t;
+    }
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  AccumType max_{0}; // value (m) with largest magnitude
+  AccumType sum_{0}; // sum((others(:)/m)**2)
+};
+
+template <int KIND> struct Norm2Helper {
+  void operator()(Descriptor &result, const Descriptor &x, int dim,
+      const Descriptor *mask, Terminator &terminator) const {
+    DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>(
+        result, x, dim, mask, "NORM2", terminator);
+  }
+};
+
+extern "C" {
+// TODO: REAL(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(Norm2_4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 4>(
+      x, source, line, dim, mask, Norm2Accumulator<4>{x}, "NORM2");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(Norm2_8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 8>(
+      x, source, line, dim, mask, Norm2Accumulator<8>{x}, "NORM2");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(Norm2_10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 10>(
+      x, source, line, dim, mask, Norm2Accumulator<10>{x}, "NORM2");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(Norm2_16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 16>(
+      x, source, line, dim, mask, Norm2Accumulator<16>{x}, "NORM2");
+}
+#endif
+
+void RTNAME(Norm2Dim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  Terminator terminator{source, line};
+  auto type{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, type);
+  if (type->first == TypeCategory::Real) {
+    ApplyFloatingPointKind<Norm2Helper, void>(
+        type->second, terminator, result, x, dim, mask, terminator);
+  } else {
+    terminator.Crash("NORM2: bad type code %d", x.type().raw());
+  }
+}
+} // extern "C"
 } // namespace Fortran::runtime
Index: flang/runtime/reduction.h
===================================================================
--- flang/runtime/reduction.h
+++ flang/runtime/reduction.h
@@ -243,6 +243,22 @@
 void RTNAME(MinvalDim)(Descriptor &, const Descriptor &, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
+// NORM2
+float RTNAME(Norm2_2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(Norm2_3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(Norm2_4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(Norm2_8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(Norm2_10)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(Norm2_16)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+void RTNAME(Norm2Dim)(Descriptor &, const Descriptor &, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
 // ALL, ANY, COUNT, & PARITY logical reductions
 bool RTNAME(All)(const Descriptor &, const char *source, int line, int dim = 0);
 void RTNAME(AllDim)(Descriptor &result, const Descriptor &, int dim,
Index: flang/runtime/reduction.cpp
===================================================================
--- flang/runtime/reduction.cpp
+++ flang/runtime/reduction.cpp
@@ -10,7 +10,7 @@
 // types and shapes.
 //
 // FINDLOC, SUM, and PRODUCT are in their own eponymous source files;
-// MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp.
+// NORM2, MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp.
 
 #include "reduction.h"
 #include "reduction-templates.h"
Index: flang/unittests/RuntimeGTest/Reduction.cpp
===================================================================
--- flang/unittests/RuntimeGTest/Reduction.cpp
+++ flang/unittests/RuntimeGTest/Reduction.cpp
@@ -47,17 +47,25 @@
   prod.Destroy();
 }
 
-TEST(Reductions, DoubleMaxMin) {
+TEST(Reductions, DoubleMaxMinNorm2) {
   std::vector<int> shape{3, 4, 2}; // rows, columns, planes
   //   0  -3   6  -9     12 -15  18 -21
   //  -1   4  -7  10    -13  16 -19  22
   //   2  -5   8 -11     14 -17  20  22   <- note last two are equal to test
   //   BACK=
-  auto array{MakeArray<TypeCategory::Real, 8>(shape,
-      std::vector<double>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13,
-          14, -15, 16, -17, 18, -19, 20, -21, 22, 22})};
+  std::vector<double> rawData{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12,
+      -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, 22};
+  auto array{MakeArray<TypeCategory::Real, 8>(shape, rawData)};
   EXPECT_EQ(RTNAME(MaxvalReal8)(*array, __FILE__, __LINE__), 22.0);
   EXPECT_EQ(RTNAME(MinvalReal8)(*array, __FILE__, __LINE__), -21.0);
+  double naiveNorm2{0};
+  for (auto x : rawData) {
+    naiveNorm2 += x * x;
+  }
+  naiveNorm2 = std::sqrt(naiveNorm2);
+  double norm2Error{
+      std::abs(naiveNorm2 - RTNAME(Norm2_8)(*array, __FILE__, __LINE__))};
+  EXPECT_LE(norm2Error, 0.000001 * naiveNorm2);
   StaticDescriptor<2> statDesc;
   Descriptor &loc{statDesc.descriptor()};
   RTNAME(Maxloc)