diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -16,37 +16,76 @@ namespace Fortran::runtime::io { -static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n, - int base, int totalBitSize) { +template +static bool EditBOZInput( + IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { std::optional remaining; std::optional next{io.PrepareInput(edit, remaining)}; - common::UnsignedInt128 value{0}; + if (*next == '0') { + do { + next = io.NextInField(remaining, edit); + } while (next && *next == '0'); + } + // Count significant digits after any leading white space & zeroes + int digits{0}; for (; next; next = io.NextInField(remaining, edit)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { continue; } - int digit{0}; if (ch >= '0' && ch <= '1') { - digit = ch - '0'; - } else if (base >= 8 && ch >= '2' && ch <= '7') { - digit = ch - '0'; - } else if (base >= 10 && ch >= '8' && ch <= '9') { - digit = ch - '0'; - } else if (base == 16 && ch >= 'A' && ch <= 'Z') { - digit = ch + 10 - 'A'; - } else if (base == 16 && ch >= 'a' && ch <= 'z') { - digit = ch + 10 - 'a'; + } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') { + } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { + } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { + } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { } else { io.GetIoErrorHandler().SignalError( "Bad character '%lc' in B/O/Z input field", ch); return false; } - value *= base; - value += digit; + ++digits; + } + auto significantBytes{static_cast(digits * LOG2_BASE + 7) / 8}; + if (significantBytes > bytes) { + io.GetIoErrorHandler().SignalError( + "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes); + return false; + } + // Reset to start of significant digits + io.HandleRelativePosition(-digits); + remaining.reset(); + // Make a second pass now that the digit count is known + std::memset(n, 0, bytes); + int increment{isHostLittleEndian ? -1 : 1}; + auto *data{reinterpret_cast(n) + + (isHostLittleEndian ? significantBytes - 1 : 0)}; + int shift{((digits - 1) * LOG2_BASE) & 7}; + if (shift + LOG2_BASE > 8) { + shift -= 8; // misaligned octal + } + while (digits > 0) { + char32_t ch{*io.NextInField(remaining, edit)}; + int digit{0}; + if (ch >= '0' && ch <= '9') { + digit = ch - '0'; + } else if (ch >= 'A' && ch <= 'F') { + digit = ch + 10 - 'A'; + } else if (ch >= 'a' && ch <= 'f') { + digit = ch + 10 - 'a'; + } else { + continue; + } + --digits; + if (shift < 0) { + shift += 8; + if (shift + LOG2_BASE > 8) { // misaligned octal + *data |= digit >> (8 - shift); + } + data += increment; + } + *data |= digit << shift; + shift -= LOG2_BASE; } - // TODO: check for overflow - std::memcpy(n, &value, totalBitSize >> 3); return true; } @@ -83,11 +122,11 @@ case 'I': break; case 'B': - return EditBOZInput(io, edit, n, 2, kind << 3); + return EditBOZInput<1>(io, edit, n, kind); case 'O': - return EditBOZInput(io, edit, n, 8, kind << 3); + return EditBOZInput<3>(io, edit, n, kind); case 'Z': - return EditBOZInput(io, edit, n, 16, kind << 3); + return EditBOZInput<4>(io, edit, n, kind); case 'A': // legacy extension return EditCharacterInput(io, edit, reinterpret_cast(n), kind); default: @@ -457,7 +496,6 @@ template bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { - constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; switch (edit.descriptor) { case DataEdit::ListDirected: if (IsNamelistName(io)) { @@ -472,14 +510,14 @@ case 'G': return EditCommonRealInput(io, edit, n); case 'B': - return EditBOZInput( - io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision)); + return EditBOZInput<1>(io, edit, n, + common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'O': - return EditBOZInput( - io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision)); + return EditBOZInput<3>(io, edit, n, + common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'Z': - return EditBOZInput( - io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision)); + return EditBOZInput<4>(io, edit, n, + common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'A': // legacy extension return EditCharacterInput(io, edit, reinterpret_cast(n), KIND); default: @@ -590,7 +628,7 @@ // or the end of the current record. Subtlety: the "remaining" count // here is a dummy that's used to avoid the interpretation of separators // in NextInField. - std::optional remaining{maxUTF8Bytes}; + std::optional remaining{length > 0 ? maxUTF8Bytes : 0}; while (std::optional next{io.NextInField(remaining, edit)}) { switch (*next) { case ' ': @@ -602,8 +640,7 @@ break; default: *x++ = *next; - --length; - remaining = maxUTF8Bytes; + remaining = --length > 0 ? maxUTF8Bytes : 0; } } std::fill_n(x, length, ' '); @@ -619,6 +656,12 @@ case 'A': case 'G': break; + case 'B': + return EditBOZInput<1>(io, edit, x, length * sizeof *x); + case 'O': + return EditBOZInput<3>(io, edit, x, length * sizeof *x); + case 'Z': + return EditBOZInput<4>(io, edit, x, length * sizeof *x); default: io.GetIoErrorHandler().SignalError(IostatErrorInFormat, "Data edit descriptor '%c' may not be used with a CHARACTER data item", diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -13,6 +13,85 @@ namespace Fortran::runtime::io { +// B/O/Z output of arbitrarily sized data emits a binary/octal/hexadecimal +// representation of what is interpreted to be a single unsigned integer value. +// When used with character data, endianness is exposed. +template +static bool EditBOZOutput(IoStatementState &io, const DataEdit &edit, + const unsigned char *data0, std::size_t bytes) { + int digits{static_cast((bytes * 8) / LOG2_BASE)}; + int get{static_cast(bytes * 8) - digits * LOG2_BASE}; + get = get ? get : LOG2_BASE; + int shift{7}; + int increment{isHostLittleEndian ? -1 : 1}; + const unsigned char *data{data0 + (isHostLittleEndian ? bytes - 1 : 0)}; + int skippedZeroes{0}; + int digit{0}; + // The same algorithm is used to generate digits for real (below) + // as well as for generating them only to skip leading zeroes (here). + // Bits are copied one at a time from the source data. + // TODO: Multiple bit copies for hexadecimal, where misalignment + // is not possible; or for octal when all 3 bits come from the + // same byte. + while (bytes > 0) { + if (get == 0) { + if (digit != 0) { + break; // first nonzero leading digit + } + ++skippedZeroes; + get = LOG2_BASE; + } else if (shift < 0) { + data += increment; + --bytes; + shift = 7; + } else { + digit = 2 * digit + ((*data >> shift--) & 1); + --get; + } + } + // Emit leading spaces and zeroes; detect field overflow + int leadingZeroes{0}; + int editWidth{edit.width.value_or(0)}; + int significant{digits - skippedZeroes}; + if (edit.digits && significant <= *edit.digits) { // Bw.m, Ow.m, Zw.m + if (*edit.digits == 0 && bytes == 0) { + editWidth = std::max(1, editWidth); + } else { + leadingZeroes = *edit.digits - significant; + } + } else if (bytes == 0) { + leadingZeroes = 1; + } + int subTotal{leadingZeroes + significant}; + int leadingSpaces{std::max(0, editWidth - subTotal)}; + if (editWidth > 0 && leadingSpaces + subTotal > editWidth) { + return io.EmitRepeated('*', editWidth); + } + if (!(io.EmitRepeated(' ', leadingSpaces) && + io.EmitRepeated('0', leadingZeroes))) { + return false; + } + // Emit remaining digits + while (bytes > 0) { + if (get == 0) { + char ch{static_cast(digit >= 10 ? 'A' + digit - 10 : '0' + digit)}; + if (!io.Emit(&ch, 1)) { + return false; + } + get = LOG2_BASE; + digit = 0; + } else if (shift < 0) { + data += increment; + --bytes; + shift = 7; + } else { + digit = 2 * digit + ((*data >> shift--) & 1); + --get; + } + } + return true; +} + template bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit, common::HostSignedIntType<8 * KIND> n) { @@ -38,21 +117,14 @@ } break; case 'B': - for (; un > 0; un >>= 1) { - *--p = '0' + (static_cast(un) & 1); - } - break; + return EditBOZOutput<1>( + io, edit, reinterpret_cast(&n), KIND); case 'O': - for (; un > 0; un >>= 3) { - *--p = '0' + (static_cast(un) & 7); - } - break; + return EditBOZOutput<3>( + io, edit, reinterpret_cast(&n), KIND); case 'Z': - for (; un > 0; un >>= 4) { - int digit = static_cast(un) & 0xf; - *--p = digit >= 10 ? 'A' + (digit - 10) : '0' + digit; - } - break; + return EditBOZOutput<4>( + io, edit, reinterpret_cast(&n), KIND); case 'A': // legacy extension return EditCharacterOutput( io, edit, reinterpret_cast(&n), sizeof n); @@ -442,11 +514,17 @@ case 'F': return EditFOutput(edit); case 'B': + return EditBOZOutput<1>(io_, edit, + reinterpret_cast(&x_), + common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'O': + return EditBOZOutput<3>(io_, edit, + reinterpret_cast(&x_), + common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'Z': - return EditIntegerOutput(io_, edit, - static_cast>( - decimal::BinaryFloatingPointNumber{x_}.raw())); + return EditBOZOutput<4>(io_, edit, + reinterpret_cast(&x_), + common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'G': return Edit(EditForGOutput(edit)); case 'A': // legacy extension @@ -475,6 +553,15 @@ case 'G': return io.EmitRepeated(' ', std::max(0, edit.width.value_or(1) - 1)) && io.Emit(truth ? "T" : "F", 1); + case 'B': + return EditBOZOutput<1>(io, edit, + reinterpret_cast(&truth), sizeof truth); + case 'O': + return EditBOZOutput<3>(io, edit, + reinterpret_cast(&truth), sizeof truth); + case 'Z': + return EditBOZOutput<4>(io, edit, + reinterpret_cast(&truth), sizeof truth); default: io.GetIoErrorHandler().SignalError(IostatErrorInFormat, "Data edit descriptor '%c' may not be used with a LOGICAL data item", @@ -544,6 +631,15 @@ case 'A': case 'G': break; + case 'B': + return EditBOZOutput<1>(io, edit, + reinterpret_cast(x), sizeof(CHAR) * length); + case 'O': + return EditBOZOutput<3>(io, edit, + reinterpret_cast(x), sizeof(CHAR) * length); + case 'Z': + return EditBOZOutput<4>(io, edit, + reinterpret_cast(x), sizeof(CHAR) * length); default: io.GetIoErrorHandler().SignalError(IostatErrorInFormat, "Data edit descriptor '%c' may not be used with a CHARACTER data item",