diff --git a/flang/include/flang/Common/long-double.h b/flang/include/flang/Common/long-double.h
new file mode 100644
--- /dev/null
+++ b/flang/include/flang/Common/long-double.h
@@ -0,0 +1,23 @@
+/*===-- include/flang/Common/config.h -------------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+/* This header can be used by both C and C++. */
+
+#ifndef FORTRAN_COMMON_LONG_DOUBLE_H
+#define FORTRAN_COMMON_LONG_DOUBLE_H
+
+#ifdef _MSC_VER /* no long double */
+#undef LONG_DOUBLE
+#elif __x86_64__ /* x87 extended precision */
+#define LONG_DOUBLE 80
+#else
+#define LONG_DOUBLE 128
+#endif
+
+#endif /* FORTRAN_COMMON_LONG_DOUBLE_H */
diff --git a/flang/include/flang/Common/uint128.h b/flang/include/flang/Common/uint128.h
--- a/flang/include/flang/Common/uint128.h
+++ b/flang/include/flang/Common/uint128.h
@@ -25,31 +25,31 @@
 
 namespace Fortran::common {
 
-class UnsignedInt128 {
+template <bool IS_SIGNED = false> class Int128 {
 public:
-  constexpr UnsignedInt128() {}
+  constexpr Int128() {}
   // This means of definition provides some portability for
   // "size_t" operands.
-  constexpr UnsignedInt128(unsigned n) : low_{n} {}
-  constexpr UnsignedInt128(unsigned long n) : low_{n} {}
-  constexpr UnsignedInt128(unsigned long long n) : low_{n} {}
-  constexpr UnsignedInt128(int n)
+  constexpr Int128(unsigned n) : low_{n} {}
+  constexpr Int128(unsigned long n) : low_{n} {}
+  constexpr Int128(unsigned long long n) : low_{n} {}
+  constexpr Int128(int n)
       : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(
                                                  n < 0)} {}
-  constexpr UnsignedInt128(long n)
+  constexpr Int128(long n)
       : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(
                                                  n < 0)} {}
-  constexpr UnsignedInt128(long long n)
+  constexpr Int128(long long n)
       : low_{static_cast<std::uint64_t>(n)}, high_{-static_cast<std::uint64_t>(
                                                  n < 0)} {}
-  constexpr UnsignedInt128(const UnsignedInt128 &) = default;
-  constexpr UnsignedInt128(UnsignedInt128 &&) = default;
-  constexpr UnsignedInt128 &operator=(const UnsignedInt128 &) = default;
-  constexpr UnsignedInt128 &operator=(UnsignedInt128 &&) = default;
+  constexpr Int128(const Int128 &) = default;
+  constexpr Int128(Int128 &&) = default;
+  constexpr Int128 &operator=(const Int128 &) = default;
+  constexpr Int128 &operator=(Int128 &&) = default;
 
-  constexpr UnsignedInt128 operator+() const { return *this; }
-  constexpr UnsignedInt128 operator~() const { return {~high_, ~low_}; }
-  constexpr UnsignedInt128 operator-() const { return ~*this + 1; }
+  constexpr Int128 operator+() const { return *this; }
+  constexpr Int128 operator~() const { return {~high_, ~low_}; }
+  constexpr Int128 operator-() const { return ~*this + 1; }
   constexpr bool operator!() const { return !low_ && !high_; }
   constexpr explicit operator bool() const { return low_ || high_; }
   constexpr explicit operator std::uint64_t() const { return low_; }
@@ -59,36 +59,36 @@
   constexpr std::uint64_t high() const { return high_; }
   constexpr std::uint64_t low() const { return low_; }
 
-  constexpr UnsignedInt128 operator++(/*prefix*/) {
+  constexpr Int128 operator++(/*prefix*/) {
     *this += 1;
     return *this;
   }
-  constexpr UnsignedInt128 operator++(int /*postfix*/) {
-    UnsignedInt128 result{*this};
+  constexpr Int128 operator++(int /*postfix*/) {
+    Int128 result{*this};
     *this += 1;
     return result;
   }
-  constexpr UnsignedInt128 operator--(/*prefix*/) {
+  constexpr Int128 operator--(/*prefix*/) {
     *this -= 1;
     return *this;
   }
-  constexpr UnsignedInt128 operator--(int /*postfix*/) {
-    UnsignedInt128 result{*this};
+  constexpr Int128 operator--(int /*postfix*/) {
+    Int128 result{*this};
     *this -= 1;
     return result;
   }
 
-  constexpr UnsignedInt128 operator&(UnsignedInt128 that) const {
+  constexpr Int128 operator&(Int128 that) const {
     return {high_ & that.high_, low_ & that.low_};
   }
-  constexpr UnsignedInt128 operator|(UnsignedInt128 that) const {
+  constexpr Int128 operator|(Int128 that) const {
     return {high_ | that.high_, low_ | that.low_};
   }
-  constexpr UnsignedInt128 operator^(UnsignedInt128 that) const {
+  constexpr Int128 operator^(Int128 that) const {
     return {high_ ^ that.high_, low_ ^ that.low_};
   }
 
-  constexpr UnsignedInt128 operator<<(UnsignedInt128 that) const {
+  constexpr Int128 operator<<(Int128 that) const {
     if (that >= 128) {
       return {};
     } else if (that == 0) {
@@ -102,7 +102,7 @@
       }
     }
   }
-  constexpr UnsignedInt128 operator>>(UnsignedInt128 that) const {
+  constexpr Int128 operator>>(Int128 that) const {
     if (that >= 128) {
       return {};
     } else if (that == 0) {
@@ -117,43 +117,41 @@
     }
   }
 
-  constexpr UnsignedInt128 operator+(UnsignedInt128 that) const {
+  constexpr Int128 operator+(Int128 that) const {
     std::uint64_t lower{(low_ & ~topBit) + (that.low_ & ~topBit)};
     bool carry{((lower >> 63) + (low_ >> 63) + (that.low_ >> 63)) > 1};
     return {high_ + that.high_ + carry, low_ + that.low_};
   }
-  constexpr UnsignedInt128 operator-(UnsignedInt128 that) const {
-    return *this + -that;
-  }
+  constexpr Int128 operator-(Int128 that) const { return *this + -that; }
 
-  constexpr UnsignedInt128 operator*(UnsignedInt128 that) const {
+  constexpr Int128 operator*(Int128 that) const {
     std::uint64_t mask32{0xffffffff};
     if (high_ == 0 && that.high_ == 0) {
       std::uint64_t x0{low_ & mask32}, x1{low_ >> 32};
       std::uint64_t y0{that.low_ & mask32}, y1{that.low_ >> 32};
-      UnsignedInt128 x0y0{x0 * y0}, x0y1{x0 * y1};
-      UnsignedInt128 x1y0{x1 * y0}, x1y1{x1 * y1};
+      Int128 x0y0{x0 * y0}, x0y1{x0 * y1};
+      Int128 x1y0{x1 * y0}, x1y1{x1 * y1};
       return x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64);
     } else {
       std::uint64_t x0{low_ & mask32}, x1{low_ >> 32}, x2{high_ & mask32},
           x3{high_ >> 32};
       std::uint64_t y0{that.low_ & mask32}, y1{that.low_ >> 32},
           y2{that.high_ & mask32}, y3{that.high_ >> 32};
-      UnsignedInt128 x0y0{x0 * y0}, x0y1{x0 * y1}, x0y2{x0 * y2}, x0y3{x0 * y3};
-      UnsignedInt128 x1y0{x1 * y0}, x1y1{x1 * y1}, x1y2{x1 * y2};
-      UnsignedInt128 x2y0{x2 * y0}, x2y1{x2 * y1};
-      UnsignedInt128 x3y0{x3 * y0};
+      Int128 x0y0{x0 * y0}, x0y1{x0 * y1}, x0y2{x0 * y2}, x0y3{x0 * y3};
+      Int128 x1y0{x1 * y0}, x1y1{x1 * y1}, x1y2{x1 * y2};
+      Int128 x2y0{x2 * y0}, x2y1{x2 * y1};
+      Int128 x3y0{x3 * y0};
       return x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) +
           ((x0y3 + x1y2 + x2y1 + x3y0) << 96);
     }
   }
 
-  constexpr UnsignedInt128 operator/(UnsignedInt128 that) const {
+  constexpr Int128 operator/(Int128 that) const {
     int j{LeadingZeroes()};
-    UnsignedInt128 bits{*this};
+    Int128 bits{*this};
     bits <<= j;
-    UnsignedInt128 numerator{};
-    UnsignedInt128 quotient{};
+    Int128 numerator{};
+    Int128 quotient{};
     for (; j < 128; ++j) {
       numerator <<= 1;
       if (bits.high_ & topBit) {
@@ -169,11 +167,11 @@
     return quotient;
   }
 
-  constexpr UnsignedInt128 operator%(UnsignedInt128 that) const {
+  constexpr Int128 operator%(Int128 that) const {
     int j{LeadingZeroes()};
-    UnsignedInt128 bits{*this};
+    Int128 bits{*this};
     bits <<= j;
-    UnsignedInt128 remainder{};
+    Int128 remainder{};
     for (; j < 128; ++j) {
       remainder <<= 1;
       if (bits.high_ & topBit) {
@@ -187,65 +185,63 @@
     return remainder;
   }
 
-  constexpr bool operator<(UnsignedInt128 that) const {
+  constexpr bool operator<(Int128 that) const {
+    if (IS_SIGNED && (high_ ^ that.high_) & topBit) {
+      return (high_ & topBit) != 0;
+    }
     return high_ < that.high_ || (high_ == that.high_ && low_ < that.low_);
   }
-  constexpr bool operator<=(UnsignedInt128 that) const {
-    return !(*this > that);
-  }
-  constexpr bool operator==(UnsignedInt128 that) const {
+  constexpr bool operator<=(Int128 that) const { return !(*this > that); }
+  constexpr bool operator==(Int128 that) const {
     return low_ == that.low_ && high_ == that.high_;
   }
-  constexpr bool operator!=(UnsignedInt128 that) const {
-    return !(*this == that);
-  }
-  constexpr bool operator>=(UnsignedInt128 that) const { return that <= *this; }
-  constexpr bool operator>(UnsignedInt128 that) const { return that < *this; }
+  constexpr bool operator!=(Int128 that) const { return !(*this == that); }
+  constexpr bool operator>=(Int128 that) const { return that <= *this; }
+  constexpr bool operator>(Int128 that) const { return that < *this; }
 
-  constexpr UnsignedInt128 &operator&=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator&=(const Int128 &that) {
     *this = *this & that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator|=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator|=(const Int128 &that) {
     *this = *this | that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator^=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator^=(const Int128 &that) {
     *this = *this ^ that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator<<=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator<<=(const Int128 &that) {
     *this = *this << that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator>>=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator>>=(const Int128 &that) {
     *this = *this >> that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator+=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator+=(const Int128 &that) {
     *this = *this + that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator-=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator-=(const Int128 &that) {
     *this = *this - that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator*=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator*=(const Int128 &that) {
     *this = *this * that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator/=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator/=(const Int128 &that) {
     *this = *this / that;
     return *this;
   }
-  constexpr UnsignedInt128 &operator%=(const UnsignedInt128 &that) {
+  constexpr Int128 &operator%=(const Int128 &that) {
     *this = *this % that;
     return *this;
   }
 
 private:
-  constexpr UnsignedInt128(std::uint64_t hi, std::uint64_t lo)
-      : low_{lo}, high_{hi} {}
+  constexpr Int128(std::uint64_t hi, std::uint64_t lo) : low_{lo}, high_{hi} {}
   constexpr int LeadingZeroes() const {
     if (high_ == 0) {
       return 64 + LeadingZeroBitCount(low_);
@@ -257,12 +253,16 @@
   std::uint64_t low_{0}, high_{0};
 };
 
-#if AVOID_NATIVE_UINT128_T
-using uint128_t = UnsignedInt128;
-#elif (defined __GNUC__ || defined __clang__) && defined __SIZEOF_INT128__
+using UnsignedInt128 = Int128<false>;
+using SignedInt128 = Int128<true>;
+
+#if !AVOID_NATIVE_UINT128_t && (defined __GNUC__ || defined __clang__) && \
+    defined __SIZEOF_INT128__
 using uint128_t = __uint128_t;
+using int128_t = __int128_t;
 #else
 using uint128_t = UnsignedInt128;
+using int128_t = SignedInt128;
 #endif
 
 template <int BITS> struct HostUnsignedIntTypeHelper {
@@ -271,8 +271,16 @@
           std::conditional_t<(BITS <= 32), std::uint32_t,
               std::conditional_t<(BITS <= 64), std::uint64_t, uint128_t>>>>;
 };
+template <int BITS> struct HostSignedIntTypeHelper {
+  using type = std::conditional_t<(BITS <= 8), std::int8_t,
+      std::conditional_t<(BITS <= 16), std::int16_t,
+          std::conditional_t<(BITS <= 32), std::int32_t,
+              std::conditional_t<(BITS <= 64), std::int64_t, int128_t>>>>;
+};
 template <int BITS>
 using HostUnsignedIntType = typename HostUnsignedIntTypeHelper<BITS>::type;
+template <int BITS>
+using HostSignedIntType = typename HostSignedIntTypeHelper<BITS>::type;
 
 } // namespace Fortran::common
 #endif
diff --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h
--- a/flang/include/flang/Decimal/decimal.h
+++ b/flang/include/flang/Decimal/decimal.h
@@ -129,20 +129,16 @@
 struct NS(ConversionToDecimalResult)
     ConvertDoubleToDecimal(char *, size_t, enum NS(DecimalConversionFlags),
         int digits, enum NS(FortranRounding), double);
-#if __x86_64__ && !defined(_MSC_VER)
 struct NS(ConversionToDecimalResult)
     ConvertLongDoubleToDecimal(char *, size_t, enum NS(DecimalConversionFlags),
         int digits, enum NS(FortranRounding), long double);
-#endif
 
 enum NS(ConversionResultFlags)
     ConvertDecimalToFloat(const char **, float *, enum NS(FortranRounding));
 enum NS(ConversionResultFlags)
     ConvertDecimalToDouble(const char **, double *, enum NS(FortranRounding));
-#if __x86_64__ && !defined(_MSC_VER)
 enum NS(ConversionResultFlags) ConvertDecimalToLongDouble(
     const char **, long double *, enum NS(FortranRounding));
-#endif
 #undef NS
 #ifdef __cplusplus
 } // extern "C"
diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp
--- a/flang/lib/Decimal/binary-to-decimal.cpp
+++ b/flang/lib/Decimal/binary-to-decimal.cpp
@@ -350,14 +350,12 @@
       rounding, Fortran::decimal::BinaryFloatingPointNumber<53>(x));
 }
 
-#if __x86_64__ && !defined(_MSC_VER)
 ConversionToDecimalResult ConvertLongDoubleToDecimal(char *buffer,
     std::size_t size, enum DecimalConversionFlags flags, int digits,
     enum FortranRounding rounding, long double x) {
   return Fortran::decimal::ConvertToDecimal(buffer, size, flags, digits,
       rounding, Fortran::decimal::BinaryFloatingPointNumber<64>(x));
 }
-#endif
 }
 
 template <int PREC, int LOG10RADIX>
diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp
--- a/flang/lib/Decimal/decimal-to-binary.cpp
+++ b/flang/lib/Decimal/decimal-to-binary.cpp
@@ -454,7 +454,6 @@
       reinterpret_cast<const void *>(&result.binary), sizeof *d);
   return result.flags;
 }
-#if __x86_64__ && !defined(_MSC_VER)
 enum ConversionResultFlags ConvertDecimalToLongDouble(
     const char **p, long double *ld, enum FortranRounding rounding) {
   auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
@@ -462,6 +461,5 @@
       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
   return result.flags;
 }
-#endif
 }
 } // namespace Fortran::decimal
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -34,6 +34,7 @@
   ISO_Fortran_binding.cpp
   allocatable.cpp
   buffer.cpp
+  complex-reduction.c
   character.cpp
   connection.cpp
   derived.cpp
@@ -50,6 +51,7 @@
   io-stmt.cpp
   main.cpp
   memory.cpp
+  reduction.cpp
   stat.cpp
   stop.cpp
   terminator.cpp
diff --git a/flang/runtime/character.h b/flang/runtime/character.h
--- a/flang/runtime/character.h
+++ b/flang/runtime/character.h
@@ -18,6 +18,16 @@
 
 class Descriptor;
 
+template <typename CHAR>
+int CharacterScalarCompare(
+    const CHAR *x, const CHAR *y, std::size_t xChars, std::size_t yChars);
+extern template int CharacterScalarCompare<char>(
+    const char *x, const char *y, std::size_t xChars, std::size_t yChars);
+extern template int CharacterScalarCompare<char16_t>(const char16_t *x,
+    const char16_t *y, std::size_t xChars, std::size_t yChars);
+extern template int CharacterScalarCompare<char32_t>(const char32_t *x,
+    const char32_t *y, std::size_t xChars, std::size_t yChars);
+
 extern "C" {
 
 // Appends the corresponding (or expanded) characters of 'operand'
diff --git a/flang/runtime/character.cpp b/flang/runtime/character.cpp
--- a/flang/runtime/character.cpp
+++ b/flang/runtime/character.cpp
@@ -7,8 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "character.h"
+#include "cpp-type.h"
 #include "descriptor.h"
 #include "terminator.h"
+#include "tools.h"
 #include "flang/Common/bit-population-count.h"
 #include "flang/Common/uint128.h"
 #include <algorithm>
@@ -30,7 +32,7 @@
 }
 
 template <typename CHAR>
-static int Compare(
+int CharacterScalarCompare(
     const CHAR *x, const CHAR *y, std::size_t xChars, std::size_t yChars) {
   auto minChars{std::min(xChars, yChars)};
   if constexpr (sizeof(CHAR) == 1) {
@@ -63,6 +65,13 @@
   return -CompareToBlankPadding(y, yChars - minChars);
 }
 
+template int CharacterScalarCompare<char>(
+    const char *x, const char *y, std::size_t xChars, std::size_t yChars);
+template int CharacterScalarCompare<char16_t>(const char16_t *x,
+    const char16_t *y, std::size_t xChars, std::size_t yChars);
+template int CharacterScalarCompare<char32_t>(const char32_t *x,
+    const char32_t *y, std::size_t xChars, std::size_t yChars);
+
 // Shift count to use when converting between character lengths
 // and byte counts.
 template <typename CHAR>
@@ -103,8 +112,8 @@
   std::size_t yChars{y.ElementBytes() >> shift<char>};
   for (SubscriptValue resultAt{0}; elements-- > 0;
        ++resultAt, x.IncrementSubscripts(xAt), y.IncrementSubscripts(yAt)) {
-    *result.OffsetElement<char>(resultAt) =
-        Compare(x.Element<CHAR>(xAt), y.Element<CHAR>(yAt), xChars, yChars);
+    *result.OffsetElement<char>(resultAt) = CharacterScalarCompare<CHAR>(
+        x.Element<CHAR>(xAt), y.Element<CHAR>(yAt), xChars, yChars);
   }
 }
 
@@ -216,38 +225,30 @@
     const Terminator &terminator) {
   switch (kind) {
   case 1:
-    LenTrim<std::int8_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 1>, CHAR>(
+        result, string, terminator);
     break;
   case 2:
-    LenTrim<std::int16_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 2>, CHAR>(
+        result, string, terminator);
     break;
   case 4:
-    LenTrim<std::int32_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 4>, CHAR>(
+        result, string, terminator);
     break;
   case 8:
-    LenTrim<std::int64_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 8>, CHAR>(
+        result, string, terminator);
     break;
   case 16:
-    LenTrim<common::uint128_t, CHAR>(result, string, terminator);
+    LenTrim<CppTypeFor<TypeCategory::Integer, 16>, CHAR>(
+        result, string, terminator);
     break;
   default:
     terminator.Crash("LEN_TRIM: bad KIND=%d", kind);
   }
 }
 
-// Utility for dealing with elemental LOGICAL arguments
-static bool IsLogicalElementTrue(
-    const Descriptor &logical, const SubscriptValue at[]) {
-  // A LOGICAL value is false if and only if all of its bytes are zero.
-  const char *p{logical.Element<char>(at)};
-  for (std::size_t j{logical.ElementBytes()}; j-- > 0; ++p) {
-    if (*p) {
-      return true;
-    }
-  }
-  return false;
-}
-
 // INDEX implementation
 template <typename CHAR>
 inline std::size_t Index(const CHAR *x, std::size_t xLen, const CHAR *want,
@@ -419,23 +420,23 @@
     const Terminator &terminator) {
   switch (kind) {
   case 1:
-    GeneralCharFunc<std::int8_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 1>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 2:
-    GeneralCharFunc<std::int16_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 2>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 4:
-    GeneralCharFunc<std::int32_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 4>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 8:
-    GeneralCharFunc<std::int64_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 8>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   case 16:
-    GeneralCharFunc<common::uint128_t, CHAR, FUNC>(
+    GeneralCharFunc<CppTypeFor<TypeCategory::Integer, 16>, CHAR, FUNC>(
         result, string, arg, back, terminator);
     break;
   default:
@@ -509,7 +510,7 @@
   for (CHAR *result{accumulator.OffsetElement<CHAR>()}; elements-- > 0;
        accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) {
     const CHAR *xData{x.Element<CHAR>(xAt)};
-    int cmp{Compare(accumData, xData, accumChars, xChars)};
+    int cmp{CharacterScalarCompare(accumData, xData, accumChars, xChars)};
     if constexpr (ISMIN) {
       cmp = -cmp;
     }
@@ -754,14 +755,16 @@
   RUNTIME_CHECK(terminator, x.raw().type == y.raw().type);
   switch (x.raw().type) {
   case CFI_type_char:
-    return Compare(x.OffsetElement<char>(), y.OffsetElement<char>(),
-        x.ElementBytes(), y.ElementBytes());
+    return CharacterScalarCompare<char>(x.OffsetElement<char>(),
+        y.OffsetElement<char>(), x.ElementBytes(), y.ElementBytes());
   case CFI_type_char16_t:
-    return Compare(x.OffsetElement<char16_t>(), y.OffsetElement<char16_t>(),
-        x.ElementBytes() >> 1, y.ElementBytes() >> 1);
+    return CharacterScalarCompare<char16_t>(x.OffsetElement<char16_t>(),
+        y.OffsetElement<char16_t>(), x.ElementBytes() >> 1,
+        y.ElementBytes() >> 1);
   case CFI_type_char32_t:
-    return Compare(x.OffsetElement<char32_t>(), y.OffsetElement<char32_t>(),
-        x.ElementBytes() >> 2, y.ElementBytes() >> 2);
+    return CharacterScalarCompare<char32_t>(x.OffsetElement<char32_t>(),
+        y.OffsetElement<char32_t>(), x.ElementBytes() >> 2,
+        y.ElementBytes() >> 2);
   default:
     terminator.Crash("CharacterCompareScalar: bad string type code %d",
         static_cast<int>(x.raw().type));
@@ -771,17 +774,17 @@
 
 int RTNAME(CharacterCompareScalar1)(
     const char *x, const char *y, std::size_t xChars, std::size_t yChars) {
-  return Compare(x, y, xChars, yChars);
+  return CharacterScalarCompare(x, y, xChars, yChars);
 }
 
 int RTNAME(CharacterCompareScalar2)(const char16_t *x, const char16_t *y,
     std::size_t xChars, std::size_t yChars) {
-  return Compare(x, y, xChars, yChars);
+  return CharacterScalarCompare(x, y, xChars, yChars);
 }
 
 int RTNAME(CharacterCompareScalar4)(const char32_t *x, const char32_t *y,
     std::size_t xChars, std::size_t yChars) {
-  return Compare(x, y, xChars, yChars);
+  return CharacterScalarCompare(x, y, xChars, yChars);
 }
 
 void RTNAME(CharacterCompare)(
diff --git a/flang/runtime/complex-reduction.h b/flang/runtime/complex-reduction.h
new file mode 100644
--- /dev/null
+++ b/flang/runtime/complex-reduction.h
@@ -0,0 +1,51 @@
+/*===-- flang/runtime/complex-reduction.h ---------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+/* Wraps the C++-coded complex-valued SUM and PRODUCT reductions with
+ * C-coded wrapper functions returning _Complex values, to avoid problems
+ * with C++ build compilers that don't support C's _Complex.
+ */
+
+#ifndef FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
+#define FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
+
+#include "entry-names.h"
+
+struct CppDescriptor; /* dummy type name for Fortran::runtime::Descriptor */
+
+#ifdef _MSC_VER
+typedef _Fcomplex float_Complex_t;
+typedef _Dcomplex double_Complex_t;
+typedef _Lcomplex long_double_Complex_t;
+#else
+typedef float _Complex float_Complex_t;
+typedef double _Complex double_Complex_t;
+typedef long double long_double_Complex_t;
+#endif
+
+#define REDUCTION_ARGS \
+  const struct CppDescriptor *x, const char *source, int line, int dim /*=0*/, \
+      const struct CppDescriptor *mask /*=NULL*/
+#define REDUCTION_ARG_NAMES x, source, line, dim, mask
+
+float_Complex_t RTNAME(SumComplex2)(REDUCTION_ARGS);
+float_Complex_t RTNAME(SumComplex3)(REDUCTION_ARGS);
+float_Complex_t RTNAME(SumComplex4)(REDUCTION_ARGS);
+double_Complex_t RTNAME(SumComplex8)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(SumComplex10)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(SumComplex16)(REDUCTION_ARGS);
+
+float_Complex_t RTNAME(ProductComplex2)(REDUCTION_ARGS);
+float_Complex_t RTNAME(ProductComplex3)(REDUCTION_ARGS);
+float_Complex_t RTNAME(ProductComplex4)(REDUCTION_ARGS);
+double_Complex_t RTNAME(ProductComplex8)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(ProductComplex10)(REDUCTION_ARGS);
+long_double_Complex_t RTNAME(ProductComplex16)(REDUCTION_ARGS);
+
+#endif // FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
diff --git a/flang/runtime/complex-reduction.c b/flang/runtime/complex-reduction.c
new file mode 100644
--- /dev/null
+++ b/flang/runtime/complex-reduction.c
@@ -0,0 +1,68 @@
+/*===-- flang/runtime/complex-reduction.c ---------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ * ===-----------------------------------------------------------------------===
+ */
+
+#include "complex-reduction.h"
+#include "flang/Common/long-double.h"
+#include <complex.h>
+
+/* These are the C standard's names for _Complex constructors; not all C
+ * compilers support them.
+ */
+#if !defined(CMPLXF) && defined(__clang__)
+#define CMPLXF __builtin_complex
+#define CMPLX __builtin_complex
+#define CMPLXL __builtin_complex
+#endif
+
+struct CppComplexFloat {
+  float r, i;
+};
+struct CppComplexDouble {
+  double r, i;
+};
+struct CppComplexLongDouble {
+  long double r, i;
+};
+
+/* RTNAME(SumComplex4) calls RTNAME(CppSumComplex4) with the same arguments
+ * and converts the members of its C++ complex result to C _Complex.
+ */
+
+#define CPP_NAME(name) Cpp##name
+#define ADAPT_REDUCTION(name, cComplex, cpptype, cmplxMacro) \
+  struct cpptype RTNAME(CPP_NAME(name))(struct cpptype *, REDUCTION_ARGS); \
+  cComplex RTNAME(name)(REDUCTION_ARGS) { \
+    struct cpptype result; \
+    RTNAME(CPP_NAME(name))(&result, REDUCTION_ARG_NAMES); \
+    return cmplxMacro(result.r, result.i); \
+  }
+
+/* TODO: COMPLEX(2 & 3) */
+
+/* SUM() */
+ADAPT_REDUCTION(SumComplex4, float_Complex_t, CppComplexFloat, CMPLXF)
+ADAPT_REDUCTION(SumComplex8, double_Complex_t, CppComplexDouble, CMPLX)
+#if LONG_DOUBLE == 80
+ADAPT_REDUCTION(
+    SumComplex10, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#elif LONG_DOUBLE == 128
+ADAPT_REDUCTION(
+    SumComplex16, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#endif
+
+/* PRODUCT() */
+ADAPT_REDUCTION(ProductComplex4, float_Complex_t, CppComplexFloat, CMPLXF)
+ADAPT_REDUCTION(ProductComplex8, double_Complex_t, CppComplexDouble, CMPLX)
+#if LONG_DOUBLE == 80
+ADAPT_REDUCTION(
+    ProductComplex10, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#elif LONG_DOUBLE == 128
+ADAPT_REDUCTION(
+    ProductComplex16, long_double_Complex_t, CppComplexLongDouble, CMPLXL)
+#endif
diff --git a/flang/runtime/cpp-type.h b/flang/runtime/cpp-type.h
new file mode 100644
--- /dev/null
+++ b/flang/runtime/cpp-type.h
@@ -0,0 +1,67 @@
+//===-- runtime/cpp-type.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Maps Fortran intrinsic types to C++ types used in the runtime.
+
+#ifndef FORTRAN_RUNTIME_CPP_TYPE_H_
+#define FORTRAN_RUNTIME_CPP_TYPE_H_
+
+#include "flang/Common/Fortran.h"
+#include "flang/Common/uint128.h"
+#include <complex>
+#include <cstdint>
+
+namespace Fortran::runtime {
+
+using common::TypeCategory;
+
+template <TypeCategory CAT, int KIND> struct CppTypeForHelper {};
+template <TypeCategory CAT, int KIND>
+using CppTypeFor = typename CppTypeForHelper<CAT, KIND>::type;
+
+template <int KIND> struct CppTypeForHelper<TypeCategory::Integer, KIND> {
+  using type = common::HostSignedIntType<8 * KIND>;
+};
+
+// TODO: REAL/COMPLEX(2 & 3)
+template <> struct CppTypeForHelper<TypeCategory::Real, 4> {
+  using type = float;
+};
+template <> struct CppTypeForHelper<TypeCategory::Real, 8> {
+  using type = double;
+};
+template <> struct CppTypeForHelper<TypeCategory::Real, 10> {
+  using type = long double;
+};
+template <> struct CppTypeForHelper<TypeCategory::Real, 16> {
+  using type = long double;
+};
+
+template <int KIND> struct CppTypeForHelper<TypeCategory::Complex, KIND> {
+  using type = std::complex<CppTypeFor<TypeCategory::Real, KIND>>;
+};
+
+template <> struct CppTypeForHelper<TypeCategory::Character, 1> {
+  using type = char;
+};
+template <> struct CppTypeForHelper<TypeCategory::Character, 2> {
+  using type = char16_t;
+};
+template <> struct CppTypeForHelper<TypeCategory::Character, 4> {
+  using type = char32_t;
+};
+
+template <int KIND> struct CppTypeForHelper<TypeCategory::Logical, KIND> {
+  using type = common::HostSignedIntType<8 * KIND>;
+};
+template <> struct CppTypeForHelper<TypeCategory::Logical, 1> {
+  using type = bool;
+};
+
+} // namespace Fortran::runtime
+#endif // FORTRAN_RUNTIME_CPP_TYPE_H_
diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h
--- a/flang/runtime/descriptor-io.h
+++ b/flang/runtime/descriptor-io.h
@@ -11,6 +11,7 @@
 
 // Implementation of I/O data list item transfers based on descriptors.
 
+#include "cpp-type.h"
 #include "descriptor.h"
 #include "edit-input.h"
 #include "edit-output.h"
@@ -260,15 +261,20 @@
     case TypeCategory::Integer:
       switch (kind) {
       case 1:
-        return FormattedIntegerIO<std::int8_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 1>, DIR>(
+            io, descriptor);
       case 2:
-        return FormattedIntegerIO<std::int16_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 2>, DIR>(
+            io, descriptor);
       case 4:
-        return FormattedIntegerIO<std::int32_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 4>, DIR>(
+            io, descriptor);
       case 8:
-        return FormattedIntegerIO<std::int64_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 8>, DIR>(
+            io, descriptor);
       case 16:
-        return FormattedIntegerIO<common::uint128_t, DIR>(io, descriptor);
+        return FormattedIntegerIO<CppTypeFor<TypeCategory::Integer, 16>, DIR>(
+            io, descriptor);
       default:
         io.GetIoErrorHandler().Crash(
             "DescriptorIO: Unimplemented INTEGER kind (%d) in descriptor",
@@ -330,13 +336,17 @@
     case TypeCategory::Logical:
       switch (kind) {
       case 1:
-        return FormattedLogicalIO<std::int8_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 1>, DIR>(
+            io, descriptor);
       case 2:
-        return FormattedLogicalIO<std::int16_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 2>, DIR>(
+            io, descriptor);
       case 4:
-        return FormattedLogicalIO<std::int32_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 4>, DIR>(
+            io, descriptor);
       case 8:
-        return FormattedLogicalIO<std::int64_t, DIR>(io, descriptor);
+        return FormattedLogicalIO<CppTypeFor<TypeCategory::Integer, 8>, DIR>(
+            io, descriptor);
       default:
         io.GetIoErrorHandler().Crash(
             "DescriptorIO: Unimplemented LOGICAL kind (%d) in descriptor",
diff --git a/flang/runtime/descriptor.h b/flang/runtime/descriptor.h
--- a/flang/runtime/descriptor.h
+++ b/flang/runtime/descriptor.h
@@ -53,6 +53,19 @@
     raw_.extent = upper >= lower ? upper - lower + 1 : 0;
     return *this;
   }
+  Dimension &SetLowerBound(SubscriptValue lower) {
+    raw_.lower_bound = lower;
+    return *this;
+  }
+  Dimension &SetUpperBound(SubscriptValue upper) {
+    auto lower{raw_.lower_bound};
+    raw_.extent = upper >= lower ? upper - lower + 1 : 0;
+    return *this;
+  }
+  Dimension &SetExtent(SubscriptValue extent) {
+    raw_.extent = extent;
+    return *this;
+  }
   Dimension &SetByteStride(SubscriptValue bytes) {
     raw_.sm = bytes;
     return *this;
@@ -137,8 +150,8 @@
     raw_.f18Addendum = false;
   }
   Descriptor(const Descriptor &);
-
   ~Descriptor();
+  Descriptor &operator=(const Descriptor &);
 
   static constexpr std::size_t BytesFor(TypeCategory category, int kind) {
     return category == TypeCategory::Complex ? kind * 2 : kind;
diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp
--- a/flang/runtime/descriptor.cpp
+++ b/flang/runtime/descriptor.cpp
@@ -17,9 +17,7 @@
 
 namespace Fortran::runtime {
 
-Descriptor::Descriptor(const Descriptor &that) {
-  std::memcpy(this, &that, that.SizeInBytes());
-}
+Descriptor::Descriptor(const Descriptor &that) { *this = that; }
 
 Descriptor::~Descriptor() {
   if (raw_.attribute != CFI_attribute_pointer) {
@@ -27,6 +25,11 @@
   }
 }
 
+Descriptor &Descriptor::operator=(const Descriptor &that) {
+  std::memcpy(this, &that, that.SizeInBytes());
+  return *this;
+}
+
 void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
     int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
     bool addendum) {
@@ -224,10 +227,9 @@
   for (int j{raw_.rank - 1}; j >= 0; --j) {
     int k{permutation ? permutation[j] : j};
     const Dimension &dim{GetDimension(k)};
-    std::size_t quotient{j ? elementNumber / dimCoefficient[j] : 0};
-    subscript[k] =
-        dim.LowerBound() + elementNumber - dimCoefficient[j] * quotient;
-    elementNumber = quotient;
+    std::size_t quotient{elementNumber / dimCoefficient[j]};
+    subscript[k] = quotient + dim.LowerBound();
+    elementNumber -= quotient * dimCoefficient[j];
   }
   return true;
 }
diff --git a/flang/runtime/entry-names.h b/flang/runtime/entry-names.h
--- a/flang/runtime/entry-names.h
+++ b/flang/runtime/entry-names.h
@@ -1,20 +1,21 @@
-//===-- runtime/entry-names.h -----------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// Defines the macro RTNAME(n) which decorates the external name of a runtime
-// library function or object with extra characters so that it
-// (a) is not in the user's name space,
-// (b) doesn't conflict with other libraries, and
-// (c) prevents incompatible versions of the runtime library from linking
-//
-// The value of REVISION should not be changed until/unless the API to the
-// runtime library must change in some way that breaks backward compatibility.
+/*===-- runtime/entry-names.h ---------------------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===------------------------------------------------------------------------===
+ */
 
+/* Defines the macro RTNAME(n) which decorates the external name of a runtime
+ * library function or object with extra characters so that it
+ * (a) is not in the user's name space,
+ * (b) doesn't conflict with other libraries, and
+ * (c) prevents incompatible versions of the runtime library from linking
+ *
+ * The value of REVISION should not be changed until/unless the API to the
+ * runtime library must change in some way that breaks backward compatibility.
+ */
 #ifndef RTNAME
 #define NAME_WITH_PREFIX_AND_REVISION(prefix, revision, name) \
   prefix##revision##name
diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -856,26 +856,6 @@
   return false;
 }
 
-template <typename INT>
-static bool SetInteger(INT &x, int kind, std::int64_t value) {
-  switch (kind) {
-  case 1:
-    reinterpret_cast<std::int8_t &>(x) = value;
-    return true;
-  case 2:
-    reinterpret_cast<std::int16_t &>(x) = value;
-    return true;
-  case 4:
-    reinterpret_cast<std::int32_t &>(x) = value;
-    return true;
-  case 8:
-    reinterpret_cast<std::int64_t &>(x) = value;
-    return true;
-  default:
-    return false;
-  }
-}
-
 bool IONAME(GetNewUnit)(Cookie cookie, int &unit, int kind) {
   IoStatementState &io{*cookie};
   auto *open{io.get_if<OpenStatementState>()};
diff --git a/flang/runtime/reduction.h b/flang/runtime/reduction.h
new file mode 100644
--- /dev/null
+++ b/flang/runtime/reduction.h
@@ -0,0 +1,230 @@
+//===-- runtime/reduction.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Defines the API for the reduction transformational intrinsic functions.
+// (Except the complex-valued total reduction forms of SUM and PRODUCT;
+// the API for those is in complex-reduction.h so that C's _Complex can
+// be used for their return types.)
+
+#ifndef FORTRAN_RUNTIME_REDUCTION_H_
+#define FORTRAN_RUNTIME_REDUCTION_H_
+
+#include "descriptor.h"
+#include "entry-names.h"
+#include "flang/Common/uint128.h"
+#include <complex>
+#include <cstdint>
+
+namespace Fortran::runtime {
+extern "C" {
+
+// Reductions that are known to return scalars have per-type entry
+// points.  These cover the casse that either have no DIM=
+// argument, or have an argument rank of 1.  Pass 0 for no DIM=
+// or the value of the DIM= argument so that it may be checked.
+// The data type in the descriptor is checked against the expected
+// return type.
+//
+// Reductions that return arrays are the remaining cases in which
+// the argument rank is greater than one and there is a DIM=
+// argument present.  These cases establish and allocate their
+// results in a caller-supplied descriptor, which is assumed to
+// be large enough.
+//
+// Complex-valued SUM and PRODUCT reductions have their API
+// entry points defined in complex-reduction.h; these are C wrappers
+// around C++ implementations so as to keep usage of C's _Complex
+// types out of C++ code.
+
+// SUM()
+
+std::int8_t RTNAME(SumInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(SumInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(SumInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(SumInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(SumInteger16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+
+// REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
+float RTNAME(SumReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(SumReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(SumReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(SumReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(SumReal10)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(SumReal16)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+
+void RTNAME(CppSumComplex2)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex3)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex4)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex8)(std::complex<double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex10)(std::complex<long double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppSumComplex16)(std::complex<long double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+
+void RTNAME(SumDim)(Descriptor &result, const Descriptor &array, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+// PRODUCT()
+
+std::int8_t RTNAME(ProductInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(ProductInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(ProductInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(ProductInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(ProductInteger16)(const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+
+// REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
+float RTNAME(ProductReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(ProductReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(ProductReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(ProductReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(ProductReal10)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(ProductReal16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+
+void RTNAME(CppProductComplex2)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex3)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex4)(std::complex<float> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex8)(std::complex<double> &, const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex10)(std::complex<long double> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+void RTNAME(CppProductComplex16)(std::complex<long double> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
+
+void RTNAME(ProductDim)(Descriptor &result, const Descriptor &array, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+// MAXLOC and MINLOC
+// These return allocated arrays in the supplied descriptor.
+// The default value for KIND= should be the default INTEGER in effect at
+// compilation time.
+void RTNAME(Maxloc)(Descriptor &, const Descriptor &, int kind,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+void RTNAME(MaxlocDim)(Descriptor &, const Descriptor &, int kind, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+void RTNAME(Minloc)(Descriptor &, const Descriptor &, int kind,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+void RTNAME(MinlocDim)(Descriptor &, const Descriptor &, int kind, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr,
+    bool back = false);
+
+// MAXVAL and MINVAL
+std::int8_t RTNAME(MaxvalInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(MaxvalInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(MaxvalInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(MaxvalInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(MaxvalInteger16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MaxvalReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MaxvalReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MaxvalReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(MaxvalReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MaxvalReal10)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MaxvalReal16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+void RTNAME(MaxvalCharacter)(Descriptor &, const Descriptor &,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+std::int8_t RTNAME(MinvalInteger1)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(MinvalInteger2)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(MinvalInteger4)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(MivalInteger8)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+common::int128_t RTNAME(MivalInteger16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MinvalReal2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MinvalReal3)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+float RTNAME(MinvalReal4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+double RTNAME(MinvalReal8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MinvalReal10)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+long double RTNAME(MinvalReal16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+void RTNAME(MinvalCharacter)(Descriptor &, const Descriptor &,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+void RTNAME(MaxvalDim)(Descriptor &, const Descriptor &, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+void RTNAME(MinvalDim)(Descriptor &, const Descriptor &, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+// ALL, ANY, & COUNT logical reductions
+bool RTNAME(All)(const Descriptor &, const char *source, int line, int dim = 0);
+void RTNAME(AllDim)(Descriptor &result, const Descriptor &, int dim,
+    const char *source, int line);
+bool RTNAME(Any)(const Descriptor &, const char *source, int line, int dim = 0);
+void RTNAME(AnyDim)(Descriptor &result, const Descriptor &, int dim,
+    const char *source, int line);
+std::int64_t RTNAME(Count)(
+    const Descriptor &, const char *source, int line, int dim = 0);
+void RTNAME(CountDim)(Descriptor &result, const Descriptor &, int dim, int kind,
+    const char *source, int line);
+
+} // extern "C"
+} // namespace Fortran::runtime
+#endif // FORTRAN_RUNTIME_REDUCTION_H_
diff --git a/flang/runtime/reduction.cpp b/flang/runtime/reduction.cpp
new file mode 100644
--- /dev/null
+++ b/flang/runtime/reduction.cpp
@@ -0,0 +1,1525 @@
+//===-- runtime/reduction.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Implements ALL, ANY, COUNT, MAXLOC, MAXVAL, MINLOC, MINVAL, PRODUCT, and SUM
+// for all required operand types and shapes and (for MAXLOC & MINLOC) kinds of
+// results.
+//
+// * Real and complex SUM reductions attempt to reduce floating-point
+//   cancellation on intermediate results by adding up partial sums
+//   for positive and negative elements independently.
+// * Partial reductions (i.e., those with DIM= arguments that are not
+//   required to be 1 by the rank of the argument) return arrays that
+//   are dynamically allocated in a caller-supplied descriptor.
+// * Total reductions (i.e., no DIM= argument) with MAXLOC & MINLOC
+//   return integer vectors of some kind, not scalars; a caller-supplied
+//   descriptor is used
+// * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
+//   length results, dynamically allocated in a caller-supplied descriptor
+
+#include "reduction.h"
+#include "character.h"
+#include "cpp-type.h"
+#include "terminator.h"
+#include "tools.h"
+#include "flang/Common/long-double.h"
+#include <cinttypes>
+#include <complex>
+#include <limits>
+#include <type_traits>
+
+namespace Fortran::runtime {
+
+// Generic reduction templates
+
+// Reductions are implemented with *accumulators*, which are instances of
+// classes that incrementally build up the result (or an element thereof) during
+// a traversal of the unmasked elements of an array.  Each accumulator class
+// supports a constructor (which captures a reference to the array), an
+// AccumulateAt() member function that applies supplied subscripts to the
+// array and does something with a scalar element, and a GetResult()
+// member function that copies a final result into its destination.
+
+// Total reduction of the array argument to a scalar (or to a vector in the
+// cases of MAXLOC & MINLOC).  These are the cases without DIM= or cases
+// where the argument has rank 1 and DIM=, if present, must be 1.
+template <typename TYPE, typename ACCUMULATOR>
+inline void DoTotalReduction(const Descriptor &x, int dim,
+    const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
+    Terminator &terminator) {
+  if (dim < 0 || dim > 1) {
+    terminator.Crash(
+        "%s: bad DIM=%d for argument with rank %d", intrinsic, dim, x.rank());
+  }
+  SubscriptValue xAt[maxRank];
+  x.GetLowerBounds(xAt);
+  if (mask) {
+    CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
+    SubscriptValue maskAt[maxRank];
+    mask->GetLowerBounds(maskAt);
+    if (mask->rank() > 0) {
+      for (auto elements{x.Elements()}; elements--;
+           x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
+        if (IsLogicalElementTrue(*mask, maskAt)) {
+          accumulator.template AccumulateAt<TYPE>(xAt);
+        }
+      }
+      return;
+    } else if (!IsLogicalElementTrue(*mask, maskAt)) {
+      // scalar MASK=.FALSE.: return identity value
+      return;
+    }
+  }
+  // No MASK=, or scalar MASK=.TRUE.
+  for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
+    if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
+      break; // cut short, result is known
+    }
+  }
+}
+
+template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
+inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask,
+    ACCUMULATOR &&accumulator, const char *intrinsic) {
+  Terminator terminator{source, line};
+  RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
+  using CppType = CppTypeFor<CAT, KIND>;
+  DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
+  CppType result;
+  accumulator.template GetResult(&result);
+  return result;
+}
+
+// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
+// of the array is [2,3,5], the shape of the result is [2,5] and
+// result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
+// lower bounds other than one.  This utility subroutine creates an
+// array of subscripts [j,_,k] for result subscripts [j,k] so that the
+// elemets of array(j,:,k) can be reduced.
+inline void GetExpandedSubscripts(SubscriptValue at[],
+    const Descriptor &descriptor, int zeroBasedDim,
+    const SubscriptValue from[]) {
+  descriptor.GetLowerBounds(at);
+  int rank{descriptor.rank()};
+  int j{0};
+  for (; j < zeroBasedDim; ++j) {
+    at[j] += from[j] - 1 /*lower bound*/;
+  }
+  for (++j; j < rank; ++j) {
+    at[j] += from[j - 1] - 1;
+  }
+}
+
+template <typename TYPE, typename ACCUMULATOR>
+inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
+    SubscriptValue subscripts[], TYPE *result) {
+  ACCUMULATOR accumulator{x};
+  SubscriptValue xAt[maxRank];
+  GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
+  const auto &dim{x.GetDimension(zeroBasedDim)};
+  SubscriptValue at{dim.LowerBound()};
+  for (auto n{dim.Extent()}; n-- > 0; ++at) {
+    xAt[zeroBasedDim] = at;
+    if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
+      break;
+    }
+  }
+  accumulator.template GetResult(result, zeroBasedDim);
+}
+
+template <typename TYPE, typename ACCUMULATOR>
+inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
+    SubscriptValue subscripts[], const Descriptor &mask, TYPE *result) {
+  ACCUMULATOR accumulator{x};
+  SubscriptValue xAt[maxRank], maskAt[maxRank];
+  GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
+  GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
+  const auto &xDim{x.GetDimension(zeroBasedDim)};
+  SubscriptValue xPos{xDim.LowerBound()};
+  const auto &maskDim{mask.GetDimension(zeroBasedDim)};
+  SubscriptValue maskPos{maskDim.LowerBound()};
+  for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
+       ++xPos, ++maskPos) {
+    maskAt[zeroBasedDim] = maskPos;
+    if (IsLogicalElementTrue(mask, maskAt)) {
+      xAt[zeroBasedDim] = xPos;
+      if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
+        break;
+      }
+    }
+  }
+  accumulator.template GetResult(result, zeroBasedDim);
+}
+
+// Utility: establishes & allocates the result array for a partial
+// reduction (i.e., one with DIM=).
+static void CreatePartialReductionResult(Descriptor &result,
+    const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
+    TypeCode typeCode) {
+  int xRank{x.rank()};
+  if (dim < 1 || dim > xRank) {
+    terminator.Crash("%s: bad DIM=%d for rank %d", intrinsic, dim, xRank);
+  }
+  int zeroBasedDim{dim - 1};
+  SubscriptValue resultExtent[maxRank];
+  for (int j{0}; j < zeroBasedDim; ++j) {
+    resultExtent[j] = x.GetDimension(j).Extent();
+  }
+  for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
+    resultExtent[j - 1] = x.GetDimension(j).Extent();
+  }
+  result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
+      CFI_attribute_allocatable);
+  for (int j{0}; j + 1 < xRank; ++j) {
+    result.GetDimension(j).SetBounds(1, resultExtent[j]);
+  }
+  if (int stat{result.Allocate()}) {
+    terminator.Crash(
+        "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+  }
+}
+
+// Partial reductions with DIM=
+
+template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
+inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
+    const Descriptor *mask, Terminator &terminator, const char *intrinsic) {
+  CreatePartialReductionResult(
+      result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
+  SubscriptValue at[maxRank];
+  result.GetLowerBounds(at);
+  INTERNAL_CHECK(at[0] == 1);
+  using CppType = CppTypeFor<CAT, KIND>;
+  if (mask) {
+    CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
+    SubscriptValue maskAt[maxRank]; // contents unused
+    if (mask->rank() > 0) {
+      for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+        ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
+            x, dim - 1, at, *mask, result.Element<CppType>(at));
+      }
+      return;
+    } else if (!IsLogicalElementTrue(*mask, maskAt)) {
+      // scalar MASK=.FALSE.
+      ACCUMULATOR accumulator{x};
+      for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+        accumulator.GetResult(result.Element<CppType>(at));
+      }
+      return;
+    }
+  }
+  // No MASK= or scalar MASK=.TRUE.
+  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+    ReduceDimToScalar<CppType, ACCUMULATOR>(
+        x, dim - 1, at, result.Element<CppType>(at));
+  }
+}
+
+template <template <typename> class INTEGER_ACCUM,
+    template <typename> class REAL_ACCUM,
+    template <typename> class COMPLEX_ACCUM>
+inline void TypedPartialNumericReduction(Descriptor &result,
+    const Descriptor &x, int dim, const char *source, int line,
+    const Descriptor *mask, const char *intrinsic) {
+  Terminator terminator{source, line};
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind.has_value());
+  switch (catKind->first) {
+  case TypeCategory::Integer:
+    switch (catKind->second) {
+    case 1:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
+          TypeCategory::Integer, 1>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 2:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
+          TypeCategory::Integer, 2>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 4:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
+          TypeCategory::Integer, 4>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 8:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 8>>,
+          TypeCategory::Integer, 8>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 16:
+      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 16>>,
+          TypeCategory::Integer, 16>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (catKind->second) {
+#if 0 // TODO
+    case 2:
+    case 3:
+#endif
+    case 4:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Real, 4>(result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 8:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Real, 8>(result, x, dim, mask, terminator, intrinsic);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
+          TypeCategory::Real, 10>(result, x, dim, mask, terminator, intrinsic);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
+          TypeCategory::Real, 16>(result, x, dim, mask, terminator, intrinsic);
+      return;
+#endif
+    }
+    break;
+  case TypeCategory::Complex:
+    switch (catKind->second) {
+#if 0 // TODO
+    case 2:
+    case 3:
+#endif
+    case 4:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Complex, 4>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+    case 8:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
+          TypeCategory::Complex, 8>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
+          TypeCategory::Complex, 10>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
+          TypeCategory::Complex, 16>(
+          result, x, dim, mask, terminator, intrinsic);
+      return;
+#endif
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash("%s: invalid type code %d", intrinsic, x.type().raw());
+}
+
+// SUM()
+
+template <typename INTERMEDIATE> class IntegerSumAccumulator {
+public:
+  explicit IntegerSumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = static_cast<A>(sum_);
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    sum_ += *array_.Element<A>(at);
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  INTERMEDIATE sum_{0};
+};
+
+template <typename INTERMEDIATE> class RealSumAccumulator {
+public:
+  explicit RealSumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> A Result() const {
+    auto sum{static_cast<A>(positives_ + negatives_)};
+    return std::isfinite(sum) ? sum : static_cast<A>(inOrder_);
+  }
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = Result<A>();
+  }
+  template <typename A> bool Accumulate(A x) {
+    // Accumulate the nonnegative and negative elements independently
+    // to reduce cancellation; also record an in-order sum for use
+    // in case of overflow.
+    if (x >= 0) {
+      positives_ += x;
+    } else {
+      negatives_ += x;
+    }
+    inOrder_ += x;
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  INTERMEDIATE positives_{0.0}, negatives_{0.0}, inOrder_{0.0};
+};
+
+template <typename PART> class ComplexSumAccumulator {
+public:
+  explicit ComplexSumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    using ResultPart = typename A::value_type;
+    *p = {reals_.template Result<ResultPart>(),
+        imaginaries_.template Result<ResultPart>()};
+  }
+  template <typename A> bool Accumulate(const A &z) {
+    reals_.Accumulate(z.real());
+    imaginaries_.Accumulate(z.imag());
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  RealSumAccumulator<PART> reals_{array_}, imaginaries_{array_};
+};
+
+extern "C" {
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(SumInteger1)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(SumInteger2)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(SumInteger4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(SumInteger8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
+      IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(SumInteger16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
+      mask, IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
+      "SUM");
+}
+
+// TODO: real/complex(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(SumReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 4>(
+      x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(SumReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 8>(
+      x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(SumReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 10>(
+      x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(SumReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 16>(
+      x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
+}
+#endif
+
+void RTNAME(CppSumComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 4>(
+      x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
+}
+void RTNAME(CppSumComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 8>(
+      x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
+}
+#if LONG_DOUBLE == 80
+void RTNAME(CppSumComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 10>(
+      x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
+}
+#elif LONG_DOUBLE == 128
+void RTNAME(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 16>(
+      x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
+}
+#endif
+
+void RTNAME(SumDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  TypedPartialNumericReduction<IntegerSumAccumulator, RealSumAccumulator,
+      ComplexSumAccumulator>(result, x, dim, source, line, mask, "SUM");
+}
+} // extern "C"
+
+// PRODUCT()
+
+template <typename INTERMEDIATE> class NonComplexProductAccumulator {
+public:
+  explicit NonComplexProductAccumulator(const Descriptor &array)
+      : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = static_cast<A>(product_);
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    product_ *= *array_.Element<A>(at);
+    return product_ != 0;
+  }
+
+private:
+  const Descriptor &array_;
+  INTERMEDIATE product_{1};
+};
+
+template <typename PART> class ComplexProductAccumulator {
+public:
+  explicit ComplexProductAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    using ResultPart = typename A::value_type;
+    *p = {static_cast<ResultPart>(product_.real()),
+        static_cast<ResultPart>(product_.imag())};
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    product_ *= *array_.Element<A>(at);
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  std::complex<PART> product_{1, 0};
+};
+
+extern "C" {
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(ProductInteger1)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(ProductInteger2)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(ProductInteger4)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(ProductInteger8)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(ProductInteger16)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
+      mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
+      "PRODUCT");
+}
+
+// TODO: real/complex(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(ProductReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 4>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(ProductReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 8>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(ProductReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 10>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x},
+      "PRODUCT");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(ProductReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Real, 16>(x, source, line, dim, mask,
+      NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x},
+      "PRODUCT");
+}
+#endif
+
+void RTNAME(CppProductComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 4>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+void RTNAME(CppProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 8>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
+      "PRODUCT");
+}
+#if LONG_DOUBLE == 80
+void RTNAME(CppProductComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 10>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x},
+      "PRODUCT");
+}
+#elif LONG_DOUBLE == 128
+void RTNAME(CppProductComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  result = GetTotalReduction<TypeCategory::Complex, 16>(x, source, line, dim,
+      mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x},
+      "PRODUCT");
+}
+#endif
+
+void RTNAME(ProductDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  TypedPartialNumericReduction<NonComplexProductAccumulator,
+      NonComplexProductAccumulator, ComplexProductAccumulator>(
+      result, x, dim, source, line, mask, "PRODUCT");
+}
+} // extern "C"
+
+// MAXLOC and MINLOC
+
+template <typename T, bool IS_MAX, bool BACK> struct NumericCompare {
+  using Type = T;
+  explicit NumericCompare(std::size_t /*elemLen; ignored*/) {}
+  bool operator()(const T &value, const T &previous) {
+    if (BACK && value == previous) {
+      return true;
+    } else if constexpr (IS_MAX) {
+      return value > previous;
+    } else {
+      return value < previous;
+    }
+  }
+};
+
+template <typename T, bool IS_MAX, bool BACK> class CharacterCompare {
+public:
+  using Type = T;
+  explicit CharacterCompare(std::size_t elemLen)
+      : chars_{elemLen / sizeof(T)} {}
+  bool operator()(const T &value, const T &previous) {
+    int cmp{CharacterScalarCompare<T>(&value, &previous, chars_, chars_)};
+    if (BACK && cmp == 0) {
+      return true;
+    } else if constexpr (IS_MAX) {
+      return cmp > 0;
+    } else {
+      return cmp < 0;
+    }
+  }
+
+private:
+  std::size_t chars_;
+};
+
+template <typename COMPARE> class ExtremumLocAccumulator {
+public:
+  using Type = typename COMPARE::Type;
+  ExtremumLocAccumulator(const Descriptor &array, std::size_t chars = 0)
+      : array_{array}, argRank_{array.rank()}, compare_{array.ElementBytes()} {
+    // per standard: result indices are all zero if no data
+    for (int j{0}; j < argRank_; ++j) {
+      extremumLoc_[j] = 0;
+    }
+  }
+  int argRank() const { return argRank_; }
+  template <typename A> void GetResult(A *p, int zeroBasedDim = -1) {
+    if (zeroBasedDim >= 0) {
+      *p = extremumLoc_[zeroBasedDim];
+    } else {
+      for (int j{0}; j < argRank_; ++j) {
+        p[j] = extremumLoc_[j];
+      }
+    }
+  }
+  template <typename IGNORED> bool AccumulateAt(const SubscriptValue at[]) {
+    const auto &value{*array_.Element<Type>(at)};
+    if (!previous_ || compare_(value, *previous_)) {
+      previous_ = &value;
+      for (int j{0}; j < argRank_; ++j) {
+        extremumLoc_[j] = at[j];
+      }
+    }
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  int argRank_;
+  SubscriptValue extremumLoc_[maxRank];
+  const Type *previous_{nullptr};
+  COMPARE compare_;
+};
+
+template <typename COMPARE, typename CPPTYPE>
+static void DoMaxOrMinLocHelper(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, const Descriptor *mask,
+    Terminator &terminator) {
+  ExtremumLocAccumulator<COMPARE> accumulator{x};
+  DoTotalReduction<CPPTYPE>(x, 0, mask, accumulator, intrinsic, terminator);
+  switch (kind) {
+  case 1:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 1>>());
+    break;
+  case 2:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 2>>());
+    break;
+  case 4:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 4>>());
+    break;
+  case 8:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 8>>());
+    break;
+  case 16:
+    accumulator.GetResult(
+        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 16>>());
+    break;
+  default:
+    terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
+  }
+}
+
+template <TypeCategory CAT, int KIND, bool IS_MAX,
+    template <typename, bool, bool> class COMPARE>
+inline void DoMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  using CppType = CppTypeFor<CAT, KIND>;
+  Terminator terminator{source, line};
+  if (back) {
+    DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, true>, CppType>(
+        intrinsic, result, x, kind, mask, terminator);
+  } else {
+    DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, false>, CppType>(
+        intrinsic, result, x, kind, mask, terminator);
+  }
+}
+
+template <bool IS_MAX>
+inline void TypedMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  int rank{x.rank()};
+  SubscriptValue extent[1]{rank};
+  result.Establish(TypeCategory::Integer, kind, nullptr, 1, extent,
+      CFI_attribute_allocatable);
+  result.GetDimension(0).SetBounds(1, extent[0]);
+  Terminator terminator{source, line};
+  if (int stat{result.Allocate()}) {
+    terminator.Crash(
+        "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+  }
+  CheckIntegerKind(terminator, kind, intrinsic);
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind.has_value());
+  switch (catKind->first) {
+  case TypeCategory::Integer:
+    switch (catKind->second) {
+    case 1:
+      DoMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 2:
+      DoMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 4:
+      DoMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 8:
+      DoMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 16:
+      DoMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (catKind->second) {
+    // TODO: REAL(2 & 3)
+    case 4:
+      DoMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 8:
+      DoMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      DoMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      DoMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+#endif
+    }
+    break;
+  case TypeCategory::Character:
+    switch (catKind->second) {
+    case 1:
+      DoMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX, CharacterCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 2:
+      DoMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX, CharacterCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    case 4:
+      DoMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX, CharacterCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+      return;
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash(
+      "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
+}
+
+extern "C" {
+void RTNAME(Maxloc)(Descriptor &result, const Descriptor &x, int kind,
+    const char *source, int line, const Descriptor *mask, bool back) {
+  TypedMaxOrMinLoc<true>("MAXLOC", result, x, kind, source, line, mask, back);
+}
+void RTNAME(Minloc)(Descriptor &result, const Descriptor &x, int kind,
+    const char *source, int line, const Descriptor *mask, bool back) {
+  TypedMaxOrMinLoc<false>("MINLOC", result, x, kind, source, line, mask, back);
+}
+} // extern "C"
+
+// MAXLOC/MINLOC with DIM=
+
+template <TypeCategory CAT, int KIND, bool IS_MAX,
+    template <typename, bool, bool> class COMPARE, bool BACK>
+static void DoPartialMaxOrMinLocDirection(const char *intrinsic,
+    Descriptor &result, const Descriptor &x, int kind, int dim,
+    const Descriptor *mask, Terminator &terminator) {
+  using CppType = CppTypeFor<CAT, KIND>;
+  switch (kind) {
+  case 1:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 1>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 2:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 2>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 4:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 4>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 8:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 8>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  case 16:
+    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
+        TypeCategory::Integer, 16>(result, x, dim, mask, terminator, intrinsic);
+    break;
+  default:
+    terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
+  }
+}
+
+template <TypeCategory CAT, int KIND, bool IS_MAX,
+    template <typename, bool, bool> class COMPARE>
+inline void DoPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, int dim, const Descriptor *mask, bool back,
+    Terminator &terminator) {
+  if (back) {
+    DoPartialMaxOrMinLocDirection<CAT, KIND, IS_MAX, COMPARE, true>(
+        intrinsic, result, x, kind, dim, mask, terminator);
+  } else {
+    DoPartialMaxOrMinLocDirection<CAT, KIND, IS_MAX, COMPARE, false>(
+        intrinsic, result, x, kind, dim, mask, terminator);
+  }
+}
+
+template <bool IS_MAX>
+inline void TypedPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
+    const Descriptor &x, int kind, int dim, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  Terminator terminator{source, line};
+  CheckIntegerKind(terminator, kind, intrinsic);
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind.has_value());
+  switch (catKind->first) {
+  case TypeCategory::Integer:
+    switch (catKind->second) {
+    case 1:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 2:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 4:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 8:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 16:
+      DoPartialMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (catKind->second) {
+    // TODO: REAL(2 & 3)
+    case 4:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 8:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      DoPartialMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+#endif
+    }
+    break;
+  case TypeCategory::Character:
+    switch (catKind->second) {
+    case 1:
+      DoPartialMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX,
+          CharacterCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 2:
+      DoPartialMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX,
+          CharacterCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    case 4:
+      DoPartialMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX,
+          CharacterCompare>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+      return;
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash(
+      "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
+}
+
+extern "C" {
+void RTNAME(MaxlocDim)(Descriptor &result, const Descriptor &x, int kind,
+    int dim, const char *source, int line, const Descriptor *mask, bool back) {
+  TypedPartialMaxOrMinLoc<true>(
+      "MAXLOC", result, x, kind, dim, source, line, mask, back);
+}
+void RTNAME(MinlocDim)(Descriptor &result, const Descriptor &x, int kind,
+    int dim, const char *source, int line, const Descriptor *mask, bool back) {
+  TypedPartialMaxOrMinLoc<false>(
+      "MINLOC", result, x, kind, dim, source, line, mask, back);
+}
+} // extern "C"
+
+// MAXVAL and MINVAL
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL> struct MaxOrMinIdentity {
+  using Type = CppTypeFor<CAT, KIND>;
+  static constexpr Type Value() {
+    return IS_MAXVAL ? std::numeric_limits<Type>::lowest()
+                     : std::numeric_limits<Type>::max();
+  }
+};
+
+// std::numeric_limits<> may not know int128_t
+template <bool IS_MAXVAL>
+struct MaxOrMinIdentity<TypeCategory::Integer, 16, IS_MAXVAL> {
+  using Type = CppTypeFor<TypeCategory::Integer, 16>;
+  static constexpr Type Value() {
+    return IS_MAXVAL ? Type{1} << 127 : ~Type{0} >> 1;
+  }
+};
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL>
+class NumericExtremumAccumulator {
+public:
+  using Type = CppTypeFor<CAT, KIND>;
+  NumericExtremumAccumulator(const Descriptor &array) : array_{array} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = extremum_;
+  }
+  bool Accumulate(Type x) {
+    if constexpr (IS_MAXVAL) {
+      if (x > extremum_) {
+        extremum_ = x;
+      }
+    } else if (x < extremum_) {
+      extremum_ = x;
+    }
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(*array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  Type extremum_{MaxOrMinIdentity<CAT, KIND, IS_MAXVAL>::Value()};
+};
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL>
+inline CppTypeFor<CAT, KIND> TotalNumericMaxOrMin(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const char *intrinsic) {
+  return GetTotalReduction<CAT, KIND>(x, source, line, dim, mask,
+      NumericExtremumAccumulator<CAT, KIND, IS_MAXVAL>{x}, intrinsic);
+}
+
+template <TypeCategory CAT, int KIND, bool IS_MAXVAL,
+    template <TypeCategory, int, bool> class ACCUMULATOR>
+static void DoMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
+    const Descriptor *mask, const char *intrinsic, Terminator &terminator) {
+  using Type = CppTypeFor<CAT, KIND>;
+  if (dim == 0 || x.rank() == 1) {
+    // Total reduction
+    result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
+        CFI_attribute_allocatable);
+    if (int stat{result.Allocate()}) {
+      terminator.Crash(
+          "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+    }
+    ACCUMULATOR<CAT, KIND, IS_MAXVAL> accumulator{x};
+    DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
+    accumulator.GetResult(result.OffsetElement<Type>());
+  } else {
+    // Partial reduction
+    PartialReduction<ACCUMULATOR<CAT, KIND, IS_MAXVAL>, CAT, KIND>(
+        result, x, dim, mask, terminator, intrinsic);
+  }
+}
+
+template <bool IS_MAXVAL>
+inline void NumericMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask,
+    const char *intrinsic) {
+  Terminator terminator{source, line};
+  auto type{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, type);
+  switch (type->first) {
+  case TypeCategory::Integer:
+    switch (type->second) {
+    case 1:
+      DoMaxOrMin<TypeCategory::Integer, 1, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 2:
+      DoMaxOrMin<TypeCategory::Integer, 2, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 4:
+      DoMaxOrMin<TypeCategory::Integer, 4, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 8:
+      DoMaxOrMin<TypeCategory::Integer, 8, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 16:
+      DoMaxOrMin<TypeCategory::Integer, 16, IS_MAXVAL,
+          NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    }
+    break;
+  case TypeCategory::Real:
+    switch (type->second) {
+    // TODO: REAL(2 & 3)
+    case 4:
+      DoMaxOrMin<TypeCategory::Real, 4, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+    case 8:
+      DoMaxOrMin<TypeCategory::Real, 8, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+#if LONG_DOUBLE == 80
+    case 10:
+      DoMaxOrMin<TypeCategory::Real, 10, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+#elif LONG_DOUBLE == 128
+    case 16:
+      DoMaxOrMin<TypeCategory::Real, 16, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+      return;
+#endif
+    }
+    break;
+  default:
+    break;
+  }
+  terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
+}
+
+template <TypeCategory, int KIND, bool IS_MAXVAL>
+class CharacterExtremumAccumulator {
+public:
+  using Type = CppTypeFor<TypeCategory::Character, KIND>;
+  CharacterExtremumAccumulator(const Descriptor &array)
+      : array_{array}, charLen_{array_.ElementBytes() / KIND} {}
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    static_assert(std::is_same_v<A, Type>);
+    if (extremum_) {
+      std::memcpy(p, extremum_, charLen_);
+    } else {
+      // empty array: result is all zero-valued characters
+      std::memset(p, 0, charLen_);
+    }
+  }
+  bool Accumulate(const Type *x) {
+    if (!extremum_) {
+      extremum_ = x;
+    } else {
+      int cmp{CharacterScalarCompare(x, extremum_, charLen_, charLen_)};
+      if (IS_MAXVAL == (cmp > 0)) {
+        extremum_ = x;
+      }
+    }
+    return true;
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(array_.Element<A>(at));
+  }
+
+private:
+  const Descriptor &array_;
+  std::size_t charLen_;
+  const Type *extremum_{nullptr};
+};
+
+template <bool IS_MAXVAL>
+inline void CharacterMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask,
+    const char *intrinsic) {
+  Terminator terminator{source, line};
+  auto type{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, type && type->first == TypeCategory::Character);
+  switch (type->second) {
+  case 1:
+    DoMaxOrMin<TypeCategory::Character, 1, IS_MAXVAL,
+        CharacterExtremumAccumulator>(
+        result, x, dim, mask, intrinsic, terminator);
+    break;
+  case 2:
+    DoMaxOrMin<TypeCategory::Character, 2, IS_MAXVAL,
+        CharacterExtremumAccumulator>(
+        result, x, dim, mask, intrinsic, terminator);
+    break;
+  case 4:
+    DoMaxOrMin<TypeCategory::Character, 4, IS_MAXVAL,
+        CharacterExtremumAccumulator>(
+        result, x, dim, mask, intrinsic, terminator);
+    break;
+  default:
+    terminator.Crash("%s: bad character kind %d", intrinsic, type->second);
+  }
+}
+
+extern "C" {
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(MaxvalInteger1)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 1, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(MaxvalInteger2)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 2, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(MaxvalInteger4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 4, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(MaxvalInteger8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 8, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(MaxvalInteger16)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 16, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+
+// TODO: REAL(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(MaxvalReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 4, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(MaxvalReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 8, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(MaxvalReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 10, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(MaxvalReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 16, true>(
+      x, source, line, dim, mask, "MAXVAL");
+}
+#endif
+
+void RTNAME(MaxvalCharacter)(Descriptor &result, const Descriptor &x,
+    const char *source, int line, const Descriptor *mask) {
+  CharacterMaxOrMin<true>(result, x, 0, source, line, mask, "MAXVAL");
+}
+
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(MinvalInteger1)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 1, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(MinvalInteger2)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 2, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(MinvalInteger4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 4, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(MinvalInteger8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 8, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(MinvalInteger16)(
+    const Descriptor &x, const char *source, int line, int dim,
+    const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Integer, 16, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+
+// TODO: REAL(2 & 3)
+CppTypeFor<TypeCategory::Real, 4> RTNAME(MinvalReal4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 4, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+CppTypeFor<TypeCategory::Real, 8> RTNAME(MinvalReal8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 8, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+#if LONG_DOUBLE == 80
+CppTypeFor<TypeCategory::Real, 10> RTNAME(MinvalReal10)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 10, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+#elif LONG_DOUBLE == 128
+CppTypeFor<TypeCategory::Real, 16> RTNAME(MinvalReal16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return TotalNumericMaxOrMin<TypeCategory::Real, 16, false>(
+      x, source, line, dim, mask, "MINVAL");
+}
+#endif
+
+void RTNAME(MinvalCharacter)(Descriptor &result, const Descriptor &x,
+    const char *source, int line, const Descriptor *mask) {
+  CharacterMaxOrMin<false>(result, x, 0, source, line, mask, "MINVAL");
+}
+
+void RTNAME(MaxvalDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  if (x.type().IsCharacter()) {
+    CharacterMaxOrMin<true>(result, x, dim, source, line, mask, "MAXVAL");
+  } else {
+    NumericMaxOrMin<true>(result, x, dim, source, line, mask, "MAXVAL");
+  }
+}
+void RTNAME(MinvalDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  if (x.type().IsCharacter()) {
+    CharacterMaxOrMin<false>(result, x, dim, source, line, mask, "MINVAL");
+  } else {
+    NumericMaxOrMin<false>(result, x, dim, source, line, mask, "MINVAL");
+  }
+}
+
+} // extern "C"
+
+// ALL, ANY, & COUNT
+
+template <bool IS_ALL> class LogicalAccumulator {
+public:
+  using Type = bool;
+  explicit LogicalAccumulator(const Descriptor &array) : array_{array} {}
+  bool Result() const { return result_; }
+  bool Accumulate(bool x) {
+    if (x == IS_ALL) {
+      return true;
+    } else {
+      result_ = x;
+      return false;
+    }
+  }
+  template <typename IGNORED = void>
+  bool AccumulateAt(const SubscriptValue at[]) {
+    return Accumulate(IsLogicalElementTrue(array_, at));
+  }
+
+private:
+  const Descriptor &array_;
+  bool result_{IS_ALL};
+};
+
+template <typename ACCUMULATOR>
+inline auto GetTotalLogicalReduction(const Descriptor &x, const char *source,
+    int line, int dim, ACCUMULATOR &&accumulator, const char *intrinsic) ->
+    typename ACCUMULATOR::Type {
+  Terminator terminator{source, line};
+  if (dim < 0 || dim > 1) {
+    terminator.Crash("%s: bad DIM=%d", intrinsic, dim);
+  }
+  SubscriptValue xAt[maxRank];
+  x.GetLowerBounds(xAt);
+  for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
+    if (!accumulator.AccumulateAt(xAt)) {
+      break; // cut short, result is known
+    }
+  }
+  return accumulator.Result();
+}
+
+template <typename ACCUMULATOR>
+inline auto ReduceLogicalDimToScalar(const Descriptor &x, int zeroBasedDim,
+    SubscriptValue subscripts[]) -> typename ACCUMULATOR::Type {
+  ACCUMULATOR accumulator{x};
+  SubscriptValue xAt[maxRank];
+  GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
+  const auto &dim{x.GetDimension(zeroBasedDim)};
+  SubscriptValue at{dim.LowerBound()};
+  for (auto n{dim.Extent()}; n-- > 0; ++at) {
+    xAt[zeroBasedDim] = at;
+    if (!accumulator.AccumulateAt(xAt)) {
+      break;
+    }
+  }
+  return accumulator.Result();
+}
+
+template <bool IS_ALL, int KIND>
+inline void ReduceLogicalDimension(Descriptor &result, const Descriptor &x,
+    int dim, Terminator &terminator, const char *intrinsic) {
+  // Standard requires result to have same LOGICAL kind as argument.
+  CreatePartialReductionResult(result, x, dim, terminator, intrinsic, x.type());
+  SubscriptValue at[maxRank];
+  result.GetLowerBounds(at);
+  INTERNAL_CHECK(at[0] == 1);
+  using CppType = CppTypeFor<TypeCategory::Logical, KIND>;
+  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+    *result.Element<CppType>(at) =
+        ReduceLogicalDimToScalar<LogicalAccumulator<IS_ALL>>(x, dim - 1, at);
+  }
+}
+
+template <bool IS_ALL>
+inline void DoReduceLogicalDimension(Descriptor &result, const Descriptor &x,
+    int dim, Terminator &terminator, const char *intrinsic) {
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical);
+  switch (catKind->second) {
+  case 1:
+    ReduceLogicalDimension<IS_ALL, 1>(result, x, dim, terminator, intrinsic);
+    break;
+  case 2:
+    ReduceLogicalDimension<IS_ALL, 2>(result, x, dim, terminator, intrinsic);
+    break;
+  case 4:
+    ReduceLogicalDimension<IS_ALL, 4>(result, x, dim, terminator, intrinsic);
+    break;
+  case 8:
+    ReduceLogicalDimension<IS_ALL, 8>(result, x, dim, terminator, intrinsic);
+    break;
+  default:
+    terminator.Crash(
+        "%s: bad argument type LOGICAL(%d)", intrinsic, catKind->second);
+  }
+}
+
+// COUNT
+
+class CountAccumulator {
+public:
+  using Type = std::int64_t;
+  explicit CountAccumulator(const Descriptor &array) : array_{array} {}
+  Type Result() const { return result_; }
+  template <typename IGNORED = void>
+  bool AccumulateAt(const SubscriptValue at[]) {
+    if (IsLogicalElementTrue(array_, at)) {
+      ++result_;
+    }
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  Type result_{0};
+};
+
+template <int KIND>
+inline void CountDimension(
+    Descriptor &result, const Descriptor &x, int dim, Terminator &terminator) {
+  CreatePartialReductionResult(result, x, dim, terminator, "COUNT",
+      TypeCode{TypeCategory::Integer, KIND});
+  SubscriptValue at[maxRank];
+  result.GetLowerBounds(at);
+  INTERNAL_CHECK(at[0] == 1);
+  using CppType = CppTypeFor<TypeCategory::Integer, KIND>;
+  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+    *result.Element<CppType>(at) =
+        ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at);
+  }
+}
+
+extern "C" {
+
+bool RTNAME(All)(const Descriptor &x, const char *source, int line, int dim) {
+  return GetTotalLogicalReduction(
+      x, source, line, dim, LogicalAccumulator<true>{x}, "ALL");
+}
+void RTNAME(AllDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line) {
+  Terminator terminator{source, line};
+  DoReduceLogicalDimension<true>(result, x, dim, terminator, "ALL");
+}
+
+bool RTNAME(Any)(const Descriptor &x, const char *source, int line, int dim) {
+  return GetTotalLogicalReduction(
+      x, source, line, dim, LogicalAccumulator<false>{x}, "ANY");
+}
+void RTNAME(AnyDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line) {
+  Terminator terminator{source, line};
+  DoReduceLogicalDimension<false>(result, x, dim, terminator, "ANY");
+}
+
+std::int64_t RTNAME(Count)(
+    const Descriptor &x, const char *source, int line, int dim) {
+  return GetTotalLogicalReduction(
+      x, source, line, dim, CountAccumulator{x}, "COUNT");
+}
+void RTNAME(CountDim)(Descriptor &result, const Descriptor &x, int dim,
+    int kind, const char *source, int line) {
+  Terminator terminator{source, line};
+  switch (kind) {
+  case 1:
+    CountDimension<1>(result, x, dim, terminator);
+    break;
+  case 2:
+    CountDimension<2>(result, x, dim, terminator);
+    break;
+  case 4:
+    CountDimension<4>(result, x, dim, terminator);
+    break;
+  case 8:
+    CountDimension<8>(result, x, dim, terminator);
+    break;
+  case 16:
+    CountDimension<16>(result, x, dim, terminator);
+    break;
+  default:
+    terminator.Crash("COUNT: bad KIND=%d", kind);
+  }
+}
+
+} // extern "C"
+} // namespace Fortran::runtime
diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h
--- a/flang/runtime/tools.h
+++ b/flang/runtime/tools.h
@@ -9,7 +9,10 @@
 #ifndef FORTRAN_RUNTIME_TOOLS_H_
 #define FORTRAN_RUNTIME_TOOLS_H_
 
+#include "cpp-type.h"
+#include "descriptor.h"
 #include "memory.h"
+#include "terminator.h"
 #include <functional>
 #include <map>
 #include <type_traits>
@@ -33,5 +36,70 @@
 // Truncates or pads as necessary
 void ToFortranDefaultCharacter(
     char *to, std::size_t toLength, const char *from);
+
+// Utility for dealing with elemental LOGICAL arguments
+inline bool IsLogicalElementTrue(
+    const Descriptor &logical, const SubscriptValue at[]) {
+  // A LOGICAL value is false if and only if all of its bytes are zero.
+  const char *p{logical.Element<char>(at)};
+  for (std::size_t j{logical.ElementBytes()}; j-- > 0; ++p) {
+    if (*p) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Check array conformability; a scalar 'x' conforms.  Crashes on error.
+void CheckConformability(const Descriptor &to, const Descriptor &x,
+    Terminator &, const char *funcName, const char *toName,
+    const char *fromName);
+
+// Validate a KIND= argument
+void CheckIntegerKind(Terminator &, int kind, const char *intrinsic);
+
+template <typename TO, typename FROM>
+inline void PutContiguousConverted(TO *to, FROM *from, std::size_t count) {
+  while (count-- > 0) {
+    *to++ = *from++;
+  }
+}
+
+static inline std::int64_t GetInt64(const char *p, std::size_t bytes) {
+  switch (bytes) {
+  case 1:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 1> *>(p);
+  case 2:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 2> *>(p);
+  case 4:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 4> *>(p);
+  case 8:
+    return *reinterpret_cast<const CppTypeFor<TypeCategory::Integer, 8> *>(p);
+  default:
+    Terminator{__FILE__, __LINE__}.Crash(
+        "GetInt64: no case for %zd bytes", bytes);
+  }
+}
+
+template <typename INT>
+inline bool SetInteger(INT &x, int kind, std::int64_t value) {
+  switch (kind) {
+  case 1:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 1> &>(x) = value;
+    return true;
+  case 2:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 2> &>(x) = value;
+    return true;
+  case 4:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 4> &>(x) = value;
+    return true;
+  case 8:
+    reinterpret_cast<CppTypeFor<TypeCategory::Integer, 8> &>(x) = value;
+    return true;
+  default:
+    return false;
+  }
+}
+
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_TOOLS_H_
diff --git a/flang/runtime/tools.cpp b/flang/runtime/tools.cpp
--- a/flang/runtime/tools.cpp
+++ b/flang/runtime/tools.cpp
@@ -7,7 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "tools.h"
+#include "terminator.h"
 #include <algorithm>
+#include <cstdint>
 #include <cstring>
 
 namespace Fortran::runtime {
@@ -75,4 +77,34 @@
   }
 }
 
+void CheckConformability(const Descriptor &to, const Descriptor &x,
+    Terminator &terminator, const char *funcName, const char *toName,
+    const char *xName) {
+  if (x.rank() == 0) {
+    return; // scalar conforms with anything
+  }
+  int rank{to.rank()};
+  if (x.rank() != rank) {
+    terminator.Crash(
+        "Incompatible array arguments to %s: %s has rank %d but %s has rank %d",
+        funcName, toName, rank, xName, x.rank());
+  } else {
+    for (int j{0}; j < rank; ++j) {
+      auto toExtent{static_cast<std::int64_t>(to.GetDimension(j).Extent())};
+      auto xExtent{static_cast<std::int64_t>(x.GetDimension(j).Extent())};
+      if (xExtent != toExtent) {
+        terminator.Crash("Incompatible array arguments to %s: dimension %d of "
+                         "%s has extent %" PRId64 " but %s has extent %" PRId64,
+            funcName, j, toName, toExtent, xName, xExtent);
+      }
+    }
+  }
+}
+
+void CheckIntegerKind(Terminator &terminator, int kind, const char *intrinsic) {
+  if (kind < 1 || kind > 16 || (kind & (kind - 1)) != 0) {
+    terminator.Crash("%s: bad KIND=%d argument", intrinsic, kind);
+  }
+}
+
 } // namespace Fortran::runtime
diff --git a/flang/runtime/transformational.h b/flang/runtime/transformational.h
--- a/flang/runtime/transformational.h
+++ b/flang/runtime/transformational.h
@@ -10,11 +10,13 @@
 #define FORTRAN_RUNTIME_TRANSFORMATIONAL_H_
 
 #include "descriptor.h"
+#include "entry-names.h"
 #include "memory.h"
 
 namespace Fortran::runtime {
 
-OwningPtr<Descriptor> RESHAPE(const Descriptor &source, const Descriptor &shape,
-    const Descriptor *pad = nullptr, const Descriptor *order = nullptr);
+OwningPtr<Descriptor> RTNAME(Reshape)(const Descriptor &source,
+    const Descriptor &shape, const Descriptor *pad = nullptr,
+    const Descriptor *order = nullptr);
 }
 #endif // FORTRAN_RUNTIME_TRANSFORMATIONAL_H_
diff --git a/flang/runtime/transformational.cpp b/flang/runtime/transformational.cpp
--- a/flang/runtime/transformational.cpp
+++ b/flang/runtime/transformational.cpp
@@ -7,32 +7,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "transformational.h"
-#include "memory.h"
 #include "terminator.h"
+#include "tools.h"
 #include <algorithm>
 #include <cinttypes>
 
 namespace Fortran::runtime {
 
-static inline std::int64_t GetInt64(const char *p, std::size_t bytes) {
-  switch (bytes) {
-  case 1:
-    return *reinterpret_cast<const std::int8_t *>(p);
-  case 2:
-    return *reinterpret_cast<const std::int16_t *>(p);
-  case 4:
-    return *reinterpret_cast<const std::int32_t *>(p);
-  case 8:
-    return *reinterpret_cast<const std::int64_t *>(p);
-  default:
-    Terminator terminator{__FILE__, __LINE__};
-    terminator.Crash("no case for %dz bytes", bytes);
-  }
-}
-
 // F2018 16.9.163
-OwningPtr<Descriptor> RESHAPE(const Descriptor &source, const Descriptor &shape,
-    const Descriptor *pad, const Descriptor *order) {
+OwningPtr<Descriptor> RTNAME(Reshape)(const Descriptor &source,
+    const Descriptor &shape, const Descriptor *pad, const Descriptor *order) {
   // Compute and check the rank of the result.
   Terminator terminator{__FILE__, __LINE__};
   RUNTIME_CHECK(terminator, shape.rank() == 1);
diff --git a/flang/runtime/type-code.cpp b/flang/runtime/type-code.cpp
--- a/flang/runtime/type-code.cpp
+++ b/flang/runtime/type-code.cpp
@@ -78,13 +78,13 @@
       raw_ = CFI_type_Bool;
       break;
     case 2:
-      raw_ = CFI_type_int_fast16_t;
+      raw_ = CFI_type_int_least16_t;
       break;
     case 4:
-      raw_ = CFI_type_int_fast32_t;
+      raw_ = CFI_type_int_least32_t;
       break;
     case 8:
-      raw_ = CFI_type_int_fast64_t;
+      raw_ = CFI_type_int_least64_t;
       break;
     }
     break;
@@ -135,13 +135,13 @@
     return std::make_pair(TypeCategory::Character, 4);
   case CFI_type_Bool:
     return std::make_pair(TypeCategory::Logical, 1);
-  case CFI_type_int_fast8_t:
+  case CFI_type_int_least8_t:
     return std::make_pair(TypeCategory::Logical, 1);
-  case CFI_type_int_fast16_t:
+  case CFI_type_int_least16_t:
     return std::make_pair(TypeCategory::Logical, 2);
-  case CFI_type_int_fast32_t:
+  case CFI_type_int_least32_t:
     return std::make_pair(TypeCategory::Logical, 4);
-  case CFI_type_int_fast64_t:
+  case CFI_type_int_least64_t:
     return std::make_pair(TypeCategory::Logical, 8);
   case CFI_type_struct:
     return std::make_pair(TypeCategory::Derived, 0);
diff --git a/flang/unittests/Evaluate/reshape.cpp b/flang/unittests/Evaluate/reshape.cpp
--- a/flang/unittests/Evaluate/reshape.cpp
+++ b/flang/unittests/Evaluate/reshape.cpp
@@ -52,7 +52,7 @@
   MATCH(2, pad.GetDimension(1).Extent());
   MATCH(3, pad.GetDimension(2).Extent());
 
-  auto result{RESHAPE(*source, *shape, &pad)};
+  auto result{RTNAME(Reshape)(*source, *shape, &pad)};
   TEST(result.get() != nullptr);
   result->Check();
   MATCH(sizeof(std::int32_t), result->ElementBytes());
diff --git a/flang/unittests/RuntimeGTest/CMakeLists.txt b/flang/unittests/RuntimeGTest/CMakeLists.txt
--- a/flang/unittests/RuntimeGTest/CMakeLists.txt
+++ b/flang/unittests/RuntimeGTest/CMakeLists.txt
@@ -1,10 +1,9 @@
 add_flang_unittest(FlangRuntimeTests
   CharacterTest.cpp
-  RuntimeCrashTest.cpp
   CrashHandlerFixture.cpp
   NumericalFormatTest.cpp
+  Reduction.cpp
   RuntimeCrashTest.cpp
-  CrashHandlerFixture.cpp
 )
 
 target_link_libraries(FlangRuntimeTests
diff --git a/flang/unittests/RuntimeGTest/Reduction.cpp b/flang/unittests/RuntimeGTest/Reduction.cpp
new file mode 100644
--- /dev/null
+++ b/flang/unittests/RuntimeGTest/Reduction.cpp
@@ -0,0 +1,296 @@
+//===-- flang/unittests/RuntimeGTest/Reductions.cpp -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../runtime/reduction.h"
+#include "gtest/gtest.h"
+#include "../../runtime/allocatable.h"
+#include "../../runtime/cpp-type.h"
+#include "../../runtime/descriptor.h"
+#include "../../runtime/type-code.h"
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <vector>
+
+using namespace Fortran::runtime;
+using Fortran::common::TypeCategory;
+
+template <typename A>
+static void StoreElement(void *p, const A &x, std::size_t bytes) {
+  std::memcpy(p, &x, bytes);
+}
+
+template <typename CHAR>
+static void StoreElement(
+    void *p, const std::basic_string<CHAR> &str, std::size_t bytes) {
+  ASSERT_LE(bytes, sizeof(CHAR) * str.size());
+  std::memcpy(p, str.data(), bytes);
+}
+
+template <TypeCategory CAT, int KIND, typename A>
+static OwningPtr<Descriptor> MakeArray(const std::vector<int> &shape,
+    const std::vector<A> &data, std::size_t elemLen = KIND) {
+  auto rank{static_cast<int>(shape.size())};
+  auto result{Descriptor::Create(TypeCode{CAT, KIND}, elemLen, nullptr, rank,
+      nullptr, CFI_attribute_allocatable)};
+  for (int j{0}; j < rank; ++j) {
+    result->GetDimension(j).SetBounds(1, shape[j]);
+  }
+  int stat{result->Allocate()};
+  EXPECT_EQ(stat, 0) << stat;
+  EXPECT_LE(data.size(), result->Elements());
+  char *p{result->OffsetElement<char>()};
+  for (const auto &x : data) {
+    StoreElement(p, x, elemLen);
+    p += elemLen;
+  }
+  return result;
+}
+
+TEST(Reductions, SumInt4) {
+  auto array{MakeArray<TypeCategory::Integer, 4>(
+      std::vector<int>{2, 3}, std::vector<std::int32_t>{1, 2, 3, 4, 5, 6})};
+  std::int32_t sum{RTNAME(SumInteger4)(*array, __FILE__, __LINE__)};
+  EXPECT_EQ(sum, 21) << sum;
+}
+
+TEST(Reductions, DimMaskProductInt4) {
+  std::vector<int> shape{2, 3};
+  auto array{MakeArray<TypeCategory::Integer, 4>(
+      shape, std::vector<std::int32_t>{1, 2, 3, 4, 5, 6})};
+  auto mask{MakeArray<TypeCategory::Logical, 1>(
+      shape, std::vector<bool>{true, false, false, true, true, true})};
+  StaticDescriptor<1> statDesc;
+  Descriptor &prod{statDesc.descriptor()};
+  RTNAME(ProductDim)(prod, *array, 1, __FILE__, __LINE__, &*mask);
+  EXPECT_EQ(prod.rank(), 1);
+  EXPECT_EQ(prod.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(prod.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(*prod.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*prod.ZeroBasedIndexedElement<std::int32_t>(1), 4);
+  EXPECT_EQ(*prod.ZeroBasedIndexedElement<std::int32_t>(2), 30);
+  EXPECT_EQ(RTNAME(SumInteger4)(prod, __FILE__, __LINE__), 35);
+  prod.Destroy();
+}
+
+TEST(Reductions, DoubleMaxMin) {
+  std::vector<int> shape{3, 4, 2}; // rows, columns, planes
+  //   0  -3   6  -9     12 -15  18 -21
+  //  -1   4  -7  10    -13  16 -19  22
+  //   2  -5   8 -11     14 -17  20  22   <- note last two are equal to test
+  //   BACK=
+  auto array{MakeArray<TypeCategory::Real, 8>(shape,
+      std::vector<double>{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13,
+          14, -15, 16, -17, 18, -19, 20, -21, 22, 22})};
+  EXPECT_EQ(RTNAME(MaxvalReal8)(*array, __FILE__, __LINE__), 22.0);
+  EXPECT_EQ(RTNAME(MinvalReal8)(*array, __FILE__, __LINE__), -21.0);
+  StaticDescriptor<2> statDesc;
+  Descriptor &loc{statDesc.descriptor()};
+  RTNAME(Maxloc)
+  (loc, *array, /*KIND=*/8, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(loc.rank(), 1);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(
+      *array->Element<double>(loc.ZeroBasedIndexedElement<SubscriptValue>(0)),
+      22.0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(0), 2);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(1), 4);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(2), 2);
+  loc.Destroy();
+  RTNAME(Maxloc)
+  (loc, *array, /*KIND=*/8, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/true);
+  EXPECT_EQ(loc.rank(), 1);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(
+      *array->Element<double>(loc.ZeroBasedIndexedElement<SubscriptValue>(0)),
+      22.0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(0), 3);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(1), 4);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int64_t>(2), 2);
+  loc.Destroy();
+  RTNAME(MinlocDim)
+  (loc, *array, /*KIND=*/2, /*DIM=*/1, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(loc.rank(), 2);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 2}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 4);
+  EXPECT_EQ(loc.GetDimension(1).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(1).Extent(), 2);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(0), 2); // -1
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(1), 3); // -5
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(2), 2); // -2
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(3), 3); // -11
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(4), 2); // -13
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(5), 3); // -17
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(6), 2); // -19
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(7), 1); // -21
+  loc.Destroy();
+  auto mask{MakeArray<TypeCategory::Logical, 1>(shape,
+      std::vector<bool>{false, false, false, false, false, true, false, true,
+          false, false, true, true, true, false, false, true, false, true, true,
+          true, false, true, true, true})};
+  RTNAME(MaxlocDim)
+  (loc, *array, /*KIND=*/2, /*DIM=*/3, __FILE__, __LINE__, /*MASK=*/&*mask,
+      false);
+  EXPECT_EQ(loc.rank(), 2);
+  EXPECT_EQ(loc.type().raw(), (TypeCode{TypeCategory::Integer, 2}.raw()));
+  EXPECT_EQ(loc.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(0).Extent(), 3);
+  EXPECT_EQ(loc.GetDimension(1).LowerBound(), 1);
+  EXPECT_EQ(loc.GetDimension(1).Extent(), 4);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(0), 2); // 12
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(1), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(2), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(3), 2); // -15
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(4), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(5), 1); // -5
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(6), 2); // 18
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(7), 1); // -7
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(8), 0);
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(9), 2); // -21
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(10), 2); // 22
+  EXPECT_EQ(*loc.ZeroBasedIndexedElement<std::int16_t>(11), 2); // 22
+  loc.Destroy();
+}
+
+TEST(Reductions, Character) {
+  std::vector<int> shape{2, 3};
+  auto array{MakeArray<TypeCategory::Character, 1>(shape,
+      std::vector<std::string>{"abc", "def", "ghi", "jkl", "mno", "abc"}, 3)};
+  StaticDescriptor<1> statDesc;
+  Descriptor &res{statDesc.descriptor()};
+  RTNAME(MaxvalCharacter)(res, *array, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 0);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Character, 1}.raw()));
+  EXPECT_EQ(std::memcmp(res.OffsetElement<char>(), "mno", 3), 0);
+  res.Destroy();
+  RTNAME(MinvalCharacter)(res, *array, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 0);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Character, 1}.raw()));
+  EXPECT_EQ(std::memcmp(res.OffsetElement<char>(), "abc", 3), 0);
+  res.Destroy();
+  RTNAME(Maxloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
+  res.Destroy();
+  auto mask{MakeArray<TypeCategory::Logical, 1>(
+      shape, std::vector<bool>{false, true, false, true, false, true})};
+  RTNAME(Maxloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/&*mask,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 2);
+  res.Destroy();
+  RTNAME(Minloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(Minloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/nullptr,
+      /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
+  res.Destroy();
+  RTNAME(Minloc)
+  (res, *array, /*KIND=*/4, __FILE__, __LINE__, /*MASK=*/&*mask,
+      /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
+  res.Destroy();
+}
+
+TEST(Reductions, Logical) {
+  std::vector<int> shape{2, 2};
+  auto array{MakeArray<TypeCategory::Logical, 4>(
+      shape, std::vector<std::int32_t>{false, false, true, true})};
+  ASSERT_EQ(array->ElementBytes(), std::size_t{4});
+  EXPECT_EQ(RTNAME(All)(*array, __FILE__, __LINE__), false);
+  EXPECT_EQ(RTNAME(Any)(*array, __FILE__, __LINE__), true);
+  EXPECT_EQ(RTNAME(Count)(*array, __FILE__, __LINE__), 2);
+  StaticDescriptor<2> statDesc;
+  Descriptor &res{statDesc.descriptor()};
+  RTNAME(AllDim)(res, *array, /*DIM=*/1, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(AllDim)(res, *array, /*DIM=*/2, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 0);
+  res.Destroy();
+  RTNAME(AnyDim)(res, *array, /*DIM=*/1, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(AnyDim)(res, *array, /*DIM=*/2, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(CountDim)(res, *array, /*DIM=*/1, /*KIND=*/4, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 2);
+  res.Destroy();
+  RTNAME(CountDim)(res, *array, /*DIM=*/2, /*KIND=*/8, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int64_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int64_t>(1), 1);
+  res.Destroy();
+}