diff --git a/flang/runtime/extrema.cpp b/flang/runtime/extrema.cpp --- a/flang/runtime/extrema.cpp +++ b/flang/runtime/extrema.cpp @@ -7,15 +7,20 @@ //===----------------------------------------------------------------------===// // Implements MAXLOC, MINLOC, MAXVAL, & MINVAL for all required operand types -// and shapes and (for MAXLOC & MINLOC) result integer kinds. +// and shapes and (for MAXLOC & MINLOC) result integer kinds. Also implements +// NORM2 using common infrastructure. #include "character.h" #include "reduction-templates.h" #include "reduction.h" #include "flang/Common/long-double.h" +#include #include +#include +#include namespace Fortran::runtime { + // MAXLOC & MINLOC template struct NumericCompare { @@ -332,11 +337,11 @@ NumericExtremumAccumulator{x}, intrinsic); } -template class ACCUMULATOR> -static void DoMaxOrMin(Descriptor &result, const Descriptor &x, int dim, +template +static void DoMaxMinNorm2(Descriptor &result, const Descriptor &x, int dim, const Descriptor *mask, const char *intrinsic, Terminator &terminator) { using Type = CppTypeFor; + ACCUMULATOR accumulator{x}; if (dim == 0 || x.rank() == 1) { // Total reduction result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr, @@ -345,14 +350,11 @@ terminator.Crash( "%s: could not allocate memory for result; STAT=%d", intrinsic, stat); } - ACCUMULATOR accumulator{x}; DoTotalReduction(x, dim, mask, accumulator, intrinsic, terminator); accumulator.GetResult(result.OffsetElement()); } else { // Partial reduction - using Accumulator = ACCUMULATOR; - Accumulator accumulator{x}; - PartialReduction( + PartialReduction( result, x, dim, mask, terminator, intrinsic, accumulator); } } @@ -362,7 +364,8 @@ void operator()(Descriptor &result, const Descriptor &x, int dim, const Descriptor *mask, const char *intrinsic, Terminator &terminator) const { - DoMaxOrMin( + DoMaxMinNorm2>( result, x, dim, mask, intrinsic, terminator); } }; @@ -392,8 +395,7 @@ } } -template -class CharacterExtremumAccumulator { +template class CharacterExtremumAccumulator { public: using Type = CppTypeFor; explicit CharacterExtremumAccumulator(const Descriptor &array) @@ -434,8 +436,8 @@ void operator()(Descriptor &result, const Descriptor &x, int dim, const Descriptor *mask, const char *intrinsic, Terminator &terminator) const { - DoMaxOrMin( + DoMaxMinNorm2>( result, x, dim, mask, intrinsic, terminator); } }; @@ -589,4 +591,91 @@ } } } // extern "C" + +// NORM2 + +template class Norm2Accumulator { +public: + using Type = CppTypeFor; + // Use at least double precision for accumulators + using AccumType = CppTypeFor; + explicit Norm2Accumulator(const Descriptor &array) : array_{array} {} + void Reinitialize() { max_ = sum_ = 0; } + template void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { + // m * sqrt(1 + sum((others(:)/m)**2)) + *p = static_cast(max_ * std::sqrt(1 + sum_)); + } + bool Accumulate(Type x) { + auto absX{AccumType{std::abs(x)}}; + if (!max_) { + max_ = x; + } else if (absX > max_) { + auto t{max_ / absX}; // < 1.0 + auto tsq{t * t}; + sum_ *= tsq; // scale sum to reflect change to the max + sum_ += tsq; // include a term for the previous max + max_ = absX; + } else { // absX <= max_ + auto t{absX / max_}; + sum_ += t * t; + } + return true; + } + template bool AccumulateAt(const SubscriptValue at[]) { + return Accumulate(*array_.Element(at)); + } + +private: + const Descriptor &array_; + AccumType max_{0}; // value (m) with largest magnitude + AccumType sum_{0}; // sum((others(:)/m)**2) +}; + +template struct Norm2Helper { + void operator()(Descriptor &result, const Descriptor &x, int dim, + const Descriptor *mask, Terminator &terminator) const { + DoMaxMinNorm2>( + result, x, dim, mask, "NORM2", terminator); + } +}; + +extern "C" { +// TODO: REAL(2 & 3) +CppTypeFor RTNAME(Norm2_4)(const Descriptor &x, + const char *source, int line, int dim, const Descriptor *mask) { + return GetTotalReduction( + x, source, line, dim, mask, Norm2Accumulator<4>{x}, "NORM2"); +} +CppTypeFor RTNAME(Norm2_8)(const Descriptor &x, + const char *source, int line, int dim, const Descriptor *mask) { + return GetTotalReduction( + x, source, line, dim, mask, Norm2Accumulator<8>{x}, "NORM2"); +} +#if LONG_DOUBLE == 80 +CppTypeFor RTNAME(Norm2_10)(const Descriptor &x, + const char *source, int line, int dim, const Descriptor *mask) { + return GetTotalReduction( + x, source, line, dim, mask, Norm2Accumulator<10>{x}, "NORM2"); +} +#elif LONG_DOUBLE == 128 +CppTypeFor RTNAME(Norm2_16)(const Descriptor &x, + const char *source, int line, int dim, const Descriptor *mask) { + return GetTotalReduction( + x, source, line, dim, mask, Norm2Accumulator<16>{x}, "NORM2"); +} +#endif + +void RTNAME(Norm2Dim)(Descriptor &result, const Descriptor &x, int dim, + const char *source, int line, const Descriptor *mask) { + Terminator terminator{source, line}; + auto type{x.type().GetCategoryAndKind()}; + RUNTIME_CHECK(terminator, type); + if (type->first == TypeCategory::Real) { + ApplyFloatingPointKind( + type->second, terminator, result, x, dim, mask, terminator); + } else { + terminator.Crash("NORM2: bad type code %d", x.type().raw()); + } +} +} // extern "C" } // namespace Fortran::runtime diff --git a/flang/runtime/reduction.h b/flang/runtime/reduction.h --- a/flang/runtime/reduction.h +++ b/flang/runtime/reduction.h @@ -243,6 +243,22 @@ void RTNAME(MinvalDim)(Descriptor &, const Descriptor &, int dim, const char *source, int line, const Descriptor *mask = nullptr); +// NORM2 +float RTNAME(Norm2_2)(const Descriptor &, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr); +float RTNAME(Norm2_3)(const Descriptor &, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr); +float RTNAME(Norm2_4)(const Descriptor &, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr); +double RTNAME(Norm2_8)(const Descriptor &, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr); +long double RTNAME(Norm2_10)(const Descriptor &, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr); +long double RTNAME(Norm2_16)(const Descriptor &, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr); +void RTNAME(Norm2Dim)(Descriptor &, const Descriptor &, int dim, + const char *source, int line, const Descriptor *mask = nullptr); + // ALL, ANY, COUNT, & PARITY logical reductions bool RTNAME(All)(const Descriptor &, const char *source, int line, int dim = 0); void RTNAME(AllDim)(Descriptor &result, const Descriptor &, int dim, diff --git a/flang/runtime/reduction.cpp b/flang/runtime/reduction.cpp --- a/flang/runtime/reduction.cpp +++ b/flang/runtime/reduction.cpp @@ -10,7 +10,7 @@ // types and shapes. // // FINDLOC, SUM, and PRODUCT are in their own eponymous source files; -// MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp. +// NORM2, MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp. #include "reduction.h" #include "reduction-templates.h" diff --git a/flang/unittests/RuntimeGTest/Reduction.cpp b/flang/unittests/RuntimeGTest/Reduction.cpp --- a/flang/unittests/RuntimeGTest/Reduction.cpp +++ b/flang/unittests/RuntimeGTest/Reduction.cpp @@ -47,17 +47,25 @@ prod.Destroy(); } -TEST(Reductions, DoubleMaxMin) { +TEST(Reductions, DoubleMaxMinNorm2) { std::vector shape{3, 4, 2}; // rows, columns, planes // 0 -3 6 -9 12 -15 18 -21 // -1 4 -7 10 -13 16 -19 22 // 2 -5 8 -11 14 -17 20 22 <- note last two are equal to test // BACK= - auto array{MakeArray(shape, - std::vector{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, - 14, -15, 16, -17, 18, -19, 20, -21, 22, 22})}; + std::vector rawData{0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, + -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, 22}; + auto array{MakeArray(shape, rawData)}; EXPECT_EQ(RTNAME(MaxvalReal8)(*array, __FILE__, __LINE__), 22.0); EXPECT_EQ(RTNAME(MinvalReal8)(*array, __FILE__, __LINE__), -21.0); + double naiveNorm2{0}; + for (auto x : rawData) { + naiveNorm2 += x * x; + } + naiveNorm2 = std::sqrt(naiveNorm2); + double norm2Error{ + std::abs(naiveNorm2 - RTNAME(Norm2_8)(*array, __FILE__, __LINE__))}; + EXPECT_LE(norm2Error, 0.000001 * naiveNorm2); StaticDescriptor<2> statDesc; Descriptor &loc{statDesc.descriptor()}; RTNAME(Maxloc)