Index: flang/lib/Semantics/check-io.cpp =================================================================== --- flang/lib/Semantics/check-io.cpp +++ flang/lib/Semantics/check-io.cpp @@ -556,10 +556,6 @@ if (HasVectorSubscript(*expr)) { context_.Say(parser::FindSourceLocation(*var), // C1201 "Internal file must not have a vector subscript"_err_en_US); - } else if (!ExprTypeKindIsDefault(*expr, context_)) { - // This may be too restrictive; other kinds may be valid. - context_.Say(parser::FindSourceLocation(*var), // C1202 - "Invalid character kind for an internal file variable"_err_en_US); } } SetSpecifier(IoSpecKind::Unit); Index: flang/runtime/connection.h =================================================================== --- flang/runtime/connection.h +++ flang/runtime/connection.h @@ -28,6 +28,7 @@ Access access{Access::Sequential}; // ACCESS='SEQUENTIAL', 'DIRECT', 'STREAM' std::optional isUnformatted; // FORM='UNFORMATTED' if true bool isUTF8{false}; // ENCODING='UTF-8' + unsigned char internalIoCharKind{0}; // 0->external, 1/2/4->internal std::optional openRecl; // RECL= on OPEN bool IsRecordFile() const { @@ -39,13 +40,18 @@ // For wide CHARACTER kinds, always use UTF-8 for formatted I/O. // For single-byte CHARACTER, encode characters >= 0x80 with // UTF-8 iff the mode is set. - return sizeof(CHAR) > 1 || isUTF8; + return internalIoCharKind == 0 && (sizeof(CHAR) > 1 || isUTF8); } }; struct ConnectionState : public ConnectionAttributes { bool IsAtEOF() const; // true when read has hit EOF or endfile record bool IsAfterEndfile() const; // true after ENDFILE until repositioned + + // All positions and measurements are always in units of bytes, + // not characters. Multi-byte character encodings are possible in + // both internal I/O (when the character kind of the variable is 2 or 4) + // and external formatted I/O (when the encoding is UTF-8). std::size_t RemainingSpaceInRecord() const; bool NeedAdvance(std::size_t) const; void HandleAbsolutePosition(std::int64_t); @@ -68,13 +74,13 @@ std::int64_t currentRecordNumber{1}; // 1 is first - // positionInRecord is the 0-based offset in the current recurd to/from - // which the next data transfer will occur. It can be past + // positionInRecord is the 0-based bytes offset in the current recurd + // to/from which the next data transfer will occur. It can be past // furthestPositionInRecord if moved by an X or T or TR control edit // descriptor. std::int64_t positionInRecord{0}; - // furthestPositionInRecord is the 0-based offset of the greatest + // furthestPositionInRecord is the 0-based byte offset of the greatest // position in the current record to/from which any data transfer has // occurred, plus one. It can be viewed as a count of bytes processed. std::int64_t furthestPositionInRecord{0}; // max(position+bytes) Index: flang/runtime/edit-input.cpp =================================================================== --- flang/runtime/edit-input.cpp +++ flang/runtime/edit-input.cpp @@ -19,18 +19,19 @@ template static bool EditBOZInput( IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { - std::optional remaining; - std::optional next{io.PrepareInput(edit, remaining)}; + // Skip leading white space & zeroes + std::optional remaining{io.CueUpInput(edit)}; + auto start{io.GetConnectionState().positionInRecord}; + std::optional next{io.NextInField(remaining, edit)}; if (next.value_or('?') == '0') { do { + start = io.GetConnectionState().positionInRecord; next = io.NextInField(remaining, edit); } while (next && *next == '0'); } // Count significant digits after any leading white space & zeroes int digits{0}; - int chars{0}; for (; next; next = io.NextInField(remaining, edit)) { - ++chars; char32_t ch{*next}; if (ch == ' ' || ch == '\t') { continue; @@ -54,7 +55,7 @@ return false; } // Reset to start of significant digits - io.HandleRelativePosition(-chars); + io.HandleAbsolutePosition(start); remaining.reset(); // Make a second pass now that the digit count is known std::memset(n, 0, bytes); @@ -99,7 +100,8 @@ // Returns true if there's a '-' sign. static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, std::optional &next, std::optional &remaining) { - next = io.PrepareInput(edit, remaining); + remaining = io.CueUpInput(edit); + next = io.NextInField(remaining, edit); bool negative{false}; if (next) { negative = *next == '-'; @@ -384,10 +386,13 @@ if (edit.modes.scale != 0) { return false; } + const ConnectionState &connection{io.GetConnectionState()}; + if (connection.internalIoCharKind > 1) { + return false; // reading non-default character + } const char *str{nullptr}; std::size_t got{io.GetNextInputBytes(str)}; - if (got == 0 || str == nullptr || - !io.GetConnectionState().recordLength.has_value()) { + if (got == 0 || str == nullptr || !connection.recordLength.has_value()) { return false; // could not access reliably-terminated input stream } const char *p{str}; @@ -569,8 +574,8 @@ edit.descriptor); return false; } - std::optional remaining; - std::optional next{io.PrepareInput(edit, remaining)}; + std::optional remaining{io.CueUpInput(edit)}; + std::optional next{io.NextInField(remaining, edit)}; if (next && *next == '.') { // skip optional period next = io.NextInField(remaining, edit); } @@ -740,6 +745,18 @@ chunk = 1; } --remaining; + } else if (connection.internalIoCharKind > 1) { + // Reading from non-default character internal unit + chunk = connection.internalIoCharKind; + if (skipping) { + --skip; + } else { + char32_t buffer{0}; + std::memcpy(&buffer, input, chunk); + *x++ = buffer; + --length; + } + --remaining; } else if constexpr (sizeof *x > 1) { // Read single byte with expansion into multi-byte CHARACTER chunk = 1; Index: flang/runtime/edit-output.cpp =================================================================== --- flang/runtime/edit-output.cpp +++ flang/runtime/edit-output.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "edit-output.h" +#include "emit-encoded.h" #include "utf.h" #include "flang/Common/uint128.h" #include @@ -69,17 +70,17 @@ int subTotal{leadingZeroes + significant}; int leadingSpaces{std::max(0, editWidth - subTotal)}; if (editWidth > 0 && leadingSpaces + subTotal > editWidth) { - return io.EmitRepeated('*', editWidth); + return EmitRepeated(io, '*', editWidth); } - if (!(io.EmitRepeated(' ', leadingSpaces) && - io.EmitRepeated('0', leadingZeroes))) { + if (!(EmitRepeated(io, ' ', leadingSpaces) && + EmitRepeated(io, '0', leadingZeroes))) { return false; } // Emit remaining digits while (bytes > 0) { if (get == 0) { char ch{static_cast(digit >= 10 ? 'A' + digit - 10 : '0' + digit)}; - if (!io.Emit(&ch, 1)) { + if (!EmitAscii(io, &ch, 1)) { return false; } get = LOG2_BASE; @@ -157,7 +158,7 @@ int subTotal{signChars + leadingZeroes + digits}; int leadingSpaces{std::max(0, editWidth - subTotal)}; if (editWidth > 0 && leadingSpaces + subTotal > editWidth) { - return io.EmitRepeated('*', editWidth); + return EmitRepeated(io, '*', editWidth); } if (edit.IsListDirected()) { int total{std::max(leadingSpaces, 1) + subTotal}; @@ -167,9 +168,9 @@ } leadingSpaces = 1; } - return io.EmitRepeated(' ', leadingSpaces) && - io.Emit(n < 0 ? "-" : "+", signChars) && - io.EmitRepeated('0', leadingZeroes) && io.Emit(p, digits); + return EmitRepeated(io, ' ', leadingSpaces) && + EmitAscii(io, n < 0 ? "-" : "+", signChars) && + EmitRepeated(io, '0', leadingZeroes) && EmitAscii(io, p, digits); } // Formats the exponent (see table 13.1 for all the cases) @@ -218,9 +219,9 @@ length += prefixLength + suffixLength; ConnectionState &connection{io_.GetConnectionState()}; return (!connection.NeedAdvance(length) || io_.AdvanceRecord()) && - io_.Emit(" (", prefixLength); + EmitAscii(io_, " (", prefixLength); } else if (width > length) { - return io_.EmitRepeated(' ', width - length); + return EmitRepeated(io_, ' ', width - length); } else { return true; } @@ -228,9 +229,10 @@ bool RealOutputEditingBase::EmitSuffix(const DataEdit &edit) { if (edit.descriptor == DataEdit::ListDirectedRealPart) { - return io_.Emit(edit.modes.editingFlags & decimalComma ? ";" : ",", 1); + return EmitAscii( + io_, edit.modes.editingFlags & decimalComma ? ";" : ",", 1); } else if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { - return io_.Emit(")", 1); + return EmitAscii(io_, ")", 1); } else { return true; } @@ -307,7 +309,7 @@ Convert(significantDigits, edit.modes.round, flags)}; if (IsInfOrNaN(converted)) { return EmitPrefix(edit, converted.length, editWidth) && - io_.Emit(converted.str, converted.length) && EmitSuffix(edit); + EmitAscii(io_, converted.str, converted.length) && EmitSuffix(edit); } if (!IsZero()) { converted.decimalExponent -= scale; @@ -354,7 +356,7 @@ expoLength}; int width{editWidth > 0 ? editWidth : totalLength}; if (totalLength > width || !exponent) { - return io_.EmitRepeated('*', width); + return EmitRepeated(io_, '*', width); } if (totalLength < width && digitsBeforePoint == 0 && zeroesBeforePoint == 0) { @@ -365,14 +367,14 @@ width = totalLength; } return EmitPrefix(edit, totalLength, width) && - io_.Emit(converted.str, signLength + digitsBeforePoint) && - io_.EmitRepeated('0', zeroesBeforePoint) && - io_.Emit(edit.modes.editingFlags & decimalComma ? "," : ".", 1) && - io_.EmitRepeated('0', zeroesAfterPoint) && - io_.Emit( - converted.str + signLength + digitsBeforePoint, digitsAfterPoint) && - io_.EmitRepeated('0', trailingZeroes) && - io_.Emit(exponent, expoLength) && EmitSuffix(edit); + EmitAscii(io_, converted.str, signLength + digitsBeforePoint) && + EmitRepeated(io_, '0', zeroesBeforePoint) && + EmitAscii(io_, edit.modes.editingFlags & decimalComma ? "," : ".", 1) && + EmitRepeated(io_, '0', zeroesAfterPoint) && + EmitAscii(io_, converted.str + signLength + digitsBeforePoint, + digitsAfterPoint) && + EmitRepeated(io_, '0', trailingZeroes) && + EmitAscii(io_, exponent, expoLength) && EmitSuffix(edit); } } @@ -401,7 +403,7 @@ Convert(extraDigits + fracDigits, rounding, flags)}; if (IsInfOrNaN(converted)) { return EmitPrefix(edit, converted.length, editWidth) && - io_.Emit(converted.str, converted.length) && EmitSuffix(edit); + EmitAscii(io_, converted.str, converted.length) && EmitSuffix(edit); } int expo{converted.decimalExponent + edit.modes.scale /*kP*/}; int signLength{*converted.str == '-' || *converted.str == '+' ? 1 : 0}; @@ -463,22 +465,22 @@ trailingZeroes}; int width{editWidth > 0 ? editWidth : totalLength}; if (totalLength > width) { - return io_.EmitRepeated('*', width); + return EmitRepeated(io_, '*', width); } if (totalLength < width && digitsBeforePoint + zeroesBeforePoint == 0) { zeroesBeforePoint = 1; ++totalLength; } return EmitPrefix(edit, totalLength, width) && - io_.Emit(converted.str, signLength + digitsBeforePoint) && - io_.EmitRepeated('0', zeroesBeforePoint) && - io_.Emit(edit.modes.editingFlags & decimalComma ? "," : ".", 1) && - io_.EmitRepeated('0', zeroesAfterPoint) && - io_.Emit( - converted.str + signLength + digitsBeforePoint, digitsAfterPoint) && - io_.EmitRepeated('1', trailingOnes) && - io_.EmitRepeated('0', trailingZeroes) && - io_.EmitRepeated(' ', trailingBlanks_) && EmitSuffix(edit); + EmitAscii(io_, converted.str, signLength + digitsBeforePoint) && + EmitRepeated(io_, '0', zeroesBeforePoint) && + EmitAscii(io_, edit.modes.editingFlags & decimalComma ? "," : ".", 1) && + EmitRepeated(io_, '0', zeroesAfterPoint) && + EmitAscii(io_, converted.str + signLength + digitsBeforePoint, + digitsAfterPoint) && + EmitRepeated(io_, '1', trailingOnes) && + EmitRepeated(io_, '0', trailingZeroes) && + EmitRepeated(io_, ' ', trailingBlanks_) && EmitSuffix(edit); } } @@ -594,15 +596,16 @@ bool ListDirectedLogicalOutput(IoStatementState &io, ListDirectedStatementState &list, bool truth) { - return list.EmitLeadingSpaceOrAdvance(io) && io.Emit(truth ? "T" : "F", 1); + return list.EmitLeadingSpaceOrAdvance(io) && + EmitAscii(io, truth ? "T" : "F", 1); } bool EditLogicalOutput(IoStatementState &io, const DataEdit &edit, bool truth) { switch (edit.descriptor) { case 'L': case 'G': - return io.EmitRepeated(' ', std::max(0, edit.width.value_or(1) - 1)) && - io.Emit(truth ? "T" : "F", 1); + return EmitRepeated(io, ' ', std::max(0, edit.width.value_or(1) - 1)) && + EmitAscii(io, truth ? "T" : "F", 1); case 'B': return EditBOZOutput<1>(io, edit, reinterpret_cast(&truth), sizeof truth); @@ -635,7 +638,7 @@ if (connection.NeedAdvance(1)) { ok = ok && io.AdvanceRecord(); } - ok = ok && io.EmitEncoded(&ch, 1); + ok = ok && EmitEncoded(io, &ch, 1); }}; EmitOne(modes.delim); for (std::size_t j{0}; j < length; ++j) { @@ -658,15 +661,18 @@ // Undelimited list-directed output ok = ok && list.EmitLeadingSpaceOrAdvance(io, length > 0 ? 1 : 0, true); std::size_t put{0}; - std::size_t oneIfUTF8{connection.useUTF8() ? 1 : length}; + std::size_t oneAtATime{ + connection.useUTF8() || connection.internalIoCharKind > 1 + ? 1 + : length}; while (ok && put < length) { if (std::size_t chunk{std::min( - std::min(length - put, oneIfUTF8), + std::min(length - put, oneAtATime), connection.RemainingSpaceInRecord())}) { - ok = io.EmitEncoded(x + put, chunk); + ok = EmitEncoded(io, x + put, chunk); put += chunk; } else { - ok = io.AdvanceRecord() && io.Emit(" ", 1); + ok = io.AdvanceRecord() && EmitAscii(io, " ", 1); } } list.set_lastWasUndelimitedCharacter(true); @@ -702,8 +708,8 @@ edit.descriptor); return false; } - return io.EmitRepeated(' ', std::max(0, width - len)) && - io.EmitEncoded(x, std::min(width, len)); + return EmitRepeated(io, ' ', std::max(0, width - len)) && + EmitEncoded(io, x, std::min(width, len)); } template bool EditIntegerOutput<1>( Index: flang/runtime/emit-encoded.h =================================================================== --- /dev/null +++ flang/runtime/emit-encoded.h @@ -0,0 +1,94 @@ +//===-- runtime/emit-encoded.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Templates for emitting CHARACTER values with conversion + +#ifndef FORTRAN_RUNTIME_EMIT_ENCODED_H_ +#define FORTRAN_RUNTIME_EMIT_ENCODED_H_ + +#include "connection.h" +#include "environment.h" +#include "utf.h" + +namespace Fortran::runtime::io { + +template +bool EmitEncoded(CONTEXT &to, const CHAR *data, std::size_t chars) { + ConnectionState &connection{to.GetConnectionState()}; + if (connection.useUTF8()) { + using UnsignedChar = std::make_unsigned_t; + const UnsignedChar *uData{reinterpret_cast(data)}; + char buffer[256]; + std::size_t at{0}; + while (chars-- > 0) { + auto len{EncodeUTF8(buffer + at, *uData++)}; + at += len; + if (at + maxUTF8Bytes > sizeof buffer) { + if (!to.Emit(buffer, at)) { + return false; + } + at = 0; + } + } + return at == 0 || to.Emit(buffer, at); + } else { + std::size_t internalKind = connection.internalIoCharKind; + if (internalKind == 0 || internalKind == sizeof(CHAR)) { + const char *rawData{reinterpret_cast(data)}; + return to.Emit(rawData, chars * sizeof(CHAR), sizeof(CHAR)); + } else { + // CHARACTER kind conversion for internal output + while (chars-- > 0) { + char32_t buffer = *data++; + char *p{reinterpret_cast(&buffer)}; + if constexpr (!isHostLittleEndian) { + p += sizeof(buffer) - internalKind; + } + if (!to.Emit(p, internalKind)) { + return false; + } + } + return true; + } + } +} + +template +bool EmitAscii(CONTEXT &to, const char *data, std::size_t chars) { + ConnectionState &connection{to.GetConnectionState()}; + if (connection.internalIoCharKind <= 1) { + return to.Emit(data, chars); + } else { + return EmitEncoded(to, data, chars); + } +} + +template +bool EmitRepeated(CONTEXT &to, char ch, std::size_t n) { + if (n <= 0) { + return true; + } + ConnectionState &connection{to.GetConnectionState()}; + if (connection.internalIoCharKind <= 1) { + while (n-- > 0) { + if (!to.Emit(&ch, 1)) { + return false; + } + } + } else { + while (n-- > 0) { + if (!EmitEncoded(to, &ch, 1)) { + return false; + } + } + } + return true; +} + +} // namespace Fortran::runtime::io +#endif // FORTRAN_RUNTIME_EMIT_ENCODED_H_ Index: flang/runtime/format-implementation.h =================================================================== --- flang/runtime/format-implementation.h +++ flang/runtime/format-implementation.h @@ -11,6 +11,7 @@ #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ +#include "emit-encoded.h" #include "format.h" #include "io-stmt.h" #include "flang/Common/format.h" @@ -130,6 +131,10 @@ break; case 'X': if (!next) { + ConnectionState &connection{context.GetConnectionState()}; + if (connection.internalIoCharKind > 1) { + n *= connection.internalIoCharKind; + } context.HandleRelativePosition(n); return; } @@ -146,7 +151,14 @@ break; case 'T': { if (!next) { // Tn - context.HandleAbsolutePosition(n - 1); // convert 1-based to 0-based + --n; // convert 1-based to 0-based + } + ConnectionState &connection{context.GetConnectionState()}; + if (connection.internalIoCharKind > 1) { + n *= connection.internalIoCharKind; + } + if (!next) { // Tn + context.HandleAbsolutePosition(n); return; } if (next == 'L' || next == 'R') { // TLn & TRn @@ -300,7 +312,7 @@ } else { --chars; } - context.Emit(format_ + start, chars); + EmitAscii(context, format_ + start, chars); } else if (ch == 'H') { // 9HHOLLERITH if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) { @@ -308,7 +320,7 @@ maybeReversionPoint); return 0; } - context.Emit(format_ + offset_, static_cast(*repeat)); + EmitAscii(context, format_ + offset_, static_cast(*repeat)); offset_ += *repeat; } else if (ch >= 'A' && ch <= 'Z') { int start{offset_ - 1}; @@ -350,7 +362,7 @@ } else if (ch == '\t' || ch == '\v') { // Tabs (extension) // TODO: any other raw characters? - context.Emit(format_ + offset_ - 1, 1); + EmitAscii(context, format_ + offset_ - 1, 1); } else { ReportBadFormat( context, "Invalid character in FORMAT", maybeReversionPoint); Index: flang/runtime/format.h =================================================================== --- flang/runtime/format.h +++ flang/runtime/format.h @@ -20,6 +20,8 @@ namespace Fortran::runtime::io { +class IoStatementState; + enum EditingFlags { blankZero = 1, // BLANK=ZERO or BZ edit decimalComma = 2, // DECIMAL=COMMA or DC edit @@ -80,7 +82,7 @@ template class FormatControl { public: using Context = CONTEXT; - using CharType = typename Context::CharType; + using CharType = char; // formats are always default kind CHARACTER FormatControl() {} FormatControl(const Terminator &, const CharType *format, Index: flang/runtime/internal-unit.h =================================================================== --- flang/runtime/internal-unit.h +++ flang/runtime/internal-unit.h @@ -26,7 +26,7 @@ public: using Scalar = std::conditional_t; - InternalDescriptorUnit(Scalar, std::size_t); + InternalDescriptorUnit(Scalar, std::size_t chars, int kind); InternalDescriptorUnit(const Descriptor &, const Terminator &); void EndIoStatement(); @@ -44,6 +44,7 @@ return descriptor().template ZeroBasedIndexedElement( currentRecordNumber - 1); } + void BlankFill(char *, std::size_t); void BlankFillOutputRecord(); StaticDescriptor staticDescriptor_; Index: flang/runtime/internal-unit.cpp =================================================================== --- flang/runtime/internal-unit.cpp +++ flang/runtime/internal-unit.cpp @@ -16,23 +16,27 @@ template InternalDescriptorUnit::InternalDescriptorUnit( - Scalar scalar, std::size_t length) { + Scalar scalar, std::size_t length, int kind) { + internalIoCharKind = kind; recordLength = length; endfileRecordNumber = 2; void *pointer{reinterpret_cast(const_cast(scalar))}; - descriptor().Establish(TypeCode{CFI_type_char}, length, pointer, 0, nullptr, - CFI_attribute_pointer); + descriptor().Establish(TypeCode{TypeCategory::Character, kind}, length * kind, + pointer, 0, nullptr, CFI_attribute_pointer); } template InternalDescriptorUnit::InternalDescriptorUnit( const Descriptor &that, const Terminator &terminator) { - RUNTIME_CHECK(terminator, that.type().IsCharacter()); + auto thatType{that.type().GetCategoryAndKind()}; + RUNTIME_CHECK(terminator, thatType.has_value()); + RUNTIME_CHECK(terminator, thatType->first == TypeCategory::Character); Descriptor &d{descriptor()}; RUNTIME_CHECK( terminator, that.SizeInBytes() <= d.SizeInBytes(maxRank, true, 0)); new (&d) Descriptor{that}; d.Check(); + internalIoCharKind = thatType->second; recordLength = d.ElementBytes(); endfileRecordNumber = d.Elements() + 1; } @@ -73,8 +77,8 @@ bytes = std::max(std::int64_t{0}, furthestAfter - positionInRecord); ok = false; } else if (positionInRecord > furthestPositionInRecord) { - std::fill_n(record + furthestPositionInRecord, - positionInRecord - furthestPositionInRecord, ' '); + BlankFill(record + furthestPositionInRecord, + positionInRecord - furthestPositionInRecord); } std::memcpy(record + positionInRecord, data, bytes); positionInRecord += bytes; @@ -118,14 +122,30 @@ return true; } +template +void InternalDescriptorUnit::BlankFill(char *at, std::size_t bytes) { + switch (internalIoCharKind) { + case 2: + std::fill_n(reinterpret_cast(at), bytes / 2, + static_cast(' ')); + break; + case 4: + std::fill_n(reinterpret_cast(at), bytes / 4, + static_cast(' ')); + break; + default: + std::fill_n(at, bytes, ' '); + break; + } +} + template void InternalDescriptorUnit::BlankFillOutputRecord() { if constexpr (DIR == Direction::Output) { if (furthestPositionInRecord < recordLength.value_or(furthestPositionInRecord)) { - char *record{CurrentRecord()}; - std::fill_n(record + furthestPositionInRecord, - *recordLength - furthestPositionInRecord, ' '); + BlankFill(CurrentRecord() + furthestPositionInRecord, + *recordLength - furthestPositionInRecord); } } } Index: flang/runtime/io-stmt.h =================================================================== --- flang/runtime/io-stmt.h +++ flang/runtime/io-stmt.h @@ -40,7 +40,7 @@ template class InternalFormattedIoStatementState; -template class InternalListIoStatementState; +template class InternalListIoStatementState; template class ExternalFormattedIoStatementState; template class ExternalListIoStatementState; @@ -87,11 +87,7 @@ // Completes an I/O statement and reclaims storage. int EndIoStatement(); - bool Emit(const char *, std::size_t, std::size_t elementBytes); - bool Emit(const char *, std::size_t); - bool Emit(const char16_t *, std::size_t chars); - bool Emit(const char32_t *, std::size_t chars); - template bool EmitEncoded(const CHAR *, std::size_t); + bool Emit(const char *, std::size_t bytes, std::size_t elementBytes = 0); bool Receive(char *, std::size_t, std::size_t elementBytes = 0); std::size_t GetNextInputBytes(const char *&); bool AdvanceRecord(int = 1); @@ -127,15 +123,10 @@ // Vacant after the end of the current record std::optional GetCurrentChar(std::size_t &byteCount); - bool EmitRepeated(char, std::size_t); - bool EmitField(const char *, std::size_t length, std::size_t width); - - // For fixed-width fields, initialize the number of remaining characters. - // Skip over leading blanks, then return the first non-blank character (if - // any). - std::optional PrepareInput( - const DataEdit &edit, std::optional &remaining) { - remaining.reset(); + // For fixed-width fields, return the number of remaining characters. + // Skip over leading blanks. + std::optional CueUpInput(const DataEdit &edit) { + std::optional remaining; if (edit.IsListDirected()) { std::size_t byteCount{0}; GetNextNonBlank(byteCount); @@ -145,7 +136,7 @@ } SkipSpaces(remaining); } - return NextInField(remaining, edit); + return remaining; } std::optional SkipSpaces(std::optional &remaining) { @@ -255,11 +246,8 @@ int EndIoStatement() { return GetIoStat(); } // These are default no-op backstops that can be overridden by descendants. - bool Emit(const char *, std::size_t, std::size_t elementBytes); - bool Emit(const char *, std::size_t); - bool Emit(const char16_t *, std::size_t chars); - bool Emit(const char32_t *, std::size_t chars); - bool Receive(char *, std::size_t, std::size_t elementBytes = 0); + bool Emit(const char *, std::size_t bytes, std::size_t elementBytes = 0); + bool Receive(char *, std::size_t bytes, std::size_t elementBytes = 0); std::size_t GetNextInputBytes(const char *&); bool AdvanceRecord(int); void BackspaceRecord(); @@ -330,22 +318,19 @@ bool imaginaryPart_{false}; }; -template +template class InternalIoStatementState : public IoStatementBase, public IoDirectionState { public: - using CharType = CHAR; using Buffer = - std::conditional_t; + std::conditional_t; InternalIoStatementState(Buffer, std::size_t, const char *sourceFile = nullptr, int sourceLine = 0); InternalIoStatementState( const Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0); int EndIoStatement(); - using IoStatementBase::Emit; - bool Emit( - const CharType *data, std::size_t chars /* not necessarily bytes */); + bool Emit(const char *data, std::size_t bytes, std::size_t elementBytes = 0); std::size_t GetNextInputBytes(const char *&); bool AdvanceRecord(int = 1); void BackspaceRecord(); @@ -361,11 +346,11 @@ template class InternalFormattedIoStatementState - : public InternalIoStatementState, + : public InternalIoStatementState, public FormattedIoStatementState { public: using CharType = CHAR; - using typename InternalIoStatementState::Buffer; + using typename InternalIoStatementState::Buffer; InternalFormattedIoStatementState(Buffer internal, std::size_t internalLength, const CharType *format, std::size_t formatLength, const char *sourceFile = nullptr, int sourceLine = 0); @@ -382,17 +367,16 @@ private: IoStatementState ioStatementState_; // points to *this - using InternalIoStatementState::unit_; + using InternalIoStatementState::unit_; // format_ *must* be last; it may be partial someday FormatControl format_; }; -template -class InternalListIoStatementState : public InternalIoStatementState, +template +class InternalListIoStatementState : public InternalIoStatementState, public ListDirectedStatementState { public: - using CharType = CHAR; - using typename InternalIoStatementState::Buffer; + using typename InternalIoStatementState::Buffer; InternalListIoStatementState(Buffer internal, std::size_t internalLength, const char *sourceFile = nullptr, int sourceLine = 0); InternalListIoStatementState( @@ -402,7 +386,7 @@ private: IoStatementState ioStatementState_; // points to *this - using InternalIoStatementState::unit_; + using InternalIoStatementState::unit_; }; class ExternalIoStatementBase : public IoStatementBase { @@ -431,10 +415,7 @@ MutableModes &mutableModes() { return mutableModes_; } void CompleteOperation(); int EndIoStatement(); - bool Emit(const char *, std::size_t, std::size_t elementBytes); - bool Emit(const char *, std::size_t); - bool Emit(const char16_t *, std::size_t chars /* not bytes */); - bool Emit(const char32_t *, std::size_t chars /* not bytes */); + bool Emit(const char *, std::size_t bytes, std::size_t elementBytes = 0); std::size_t GetNextInputBytes(const char *&); bool AdvanceRecord(int = 1); void BackspaceRecord(); @@ -498,10 +479,7 @@ ExternalFileUnit *GetExternalFileUnit() const; void CompleteOperation(); int EndIoStatement(); - bool Emit(const char *, std::size_t, std::size_t elementBytes); - bool Emit(const char *, std::size_t); - bool Emit(const char16_t *, std::size_t chars /* not bytes */); - bool Emit(const char32_t *, std::size_t chars /* not bytes */); + bool Emit(const char *, std::size_t bytes, std::size_t elementBytes = 0); std::size_t GetNextInputBytes(const char *&); void HandleRelativePosition(std::int64_t); void HandleAbsolutePosition(std::int64_t); @@ -696,10 +674,7 @@ public: InquireIOLengthState(const char *sourceFile = nullptr, int sourceLine = 0); std::size_t bytes() const { return bytes_; } - bool Emit(const char *, std::size_t, std::size_t elementBytes); - bool Emit(const char *, std::size_t); - bool Emit(const char16_t *, std::size_t chars); - bool Emit(const char32_t *, std::size_t chars); + bool Emit(const char *, std::size_t bytes, std::size_t elementBytes = 0); private: std::size_t bytes_{0}; @@ -735,12 +710,5 @@ ExternalFileUnit *unit_{nullptr}; }; -extern template bool IoStatementState::EmitEncoded( - const char *, std::size_t); -extern template bool IoStatementState::EmitEncoded( - const char16_t *, std::size_t); -extern template bool IoStatementState::EmitEncoded( - const char32_t *, std::size_t); - } // namespace Fortran::runtime::io #endif // FORTRAN_RUNTIME_IO_STMT_H_ Index: flang/runtime/io-stmt.cpp =================================================================== --- flang/runtime/io-stmt.cpp +++ flang/runtime/io-stmt.cpp @@ -8,6 +8,7 @@ #include "io-stmt.h" #include "connection.h" +#include "emit-encoded.h" #include "format.h" #include "tools.h" #include "unit.h" @@ -25,12 +26,6 @@ return false; } -bool IoStatementBase::Emit(const char *, std::size_t) { return false; } - -bool IoStatementBase::Emit(const char16_t *, std::size_t) { return false; } - -bool IoStatementBase::Emit(const char32_t *, std::size_t) { return false; } - std::size_t IoStatementBase::GetNextInputBytes(const char *&p) { p = nullptr; return 0; @@ -82,34 +77,33 @@ decode ? decode : "(cannot decode)"); } -template -InternalIoStatementState::InternalIoStatementState( +template +InternalIoStatementState::InternalIoStatementState( Buffer scalar, std::size_t length, const char *sourceFile, int sourceLine) - : IoStatementBase{sourceFile, sourceLine}, unit_{scalar, length} {} + : IoStatementBase{sourceFile, sourceLine}, unit_{scalar, length, 1} {} -template -InternalIoStatementState::InternalIoStatementState( +template +InternalIoStatementState::InternalIoStatementState( const Descriptor &d, const char *sourceFile, int sourceLine) : IoStatementBase{sourceFile, sourceLine}, unit_{d, *this} {} -template -bool InternalIoStatementState::Emit( - const CharType *data, std::size_t chars) { +template +bool InternalIoStatementState::Emit( + const char *data, std::size_t bytes, std::size_t /*elementBytes*/) { if constexpr (DIR == Direction::Input) { Crash("InternalIoStatementState::Emit() called"); return false; } - return unit_.Emit(data, chars * sizeof(CharType), *this); + return unit_.Emit(data, bytes, *this); } -template -std::size_t InternalIoStatementState::GetNextInputBytes( - const char *&p) { +template +std::size_t InternalIoStatementState::GetNextInputBytes(const char *&p) { return unit_.GetNextInputBytes(p, *this); } -template -bool InternalIoStatementState::AdvanceRecord(int n) { +template +bool InternalIoStatementState::AdvanceRecord(int n) { while (n-- > 0) { if (!unit_.AdvanceRecord(*this)) { return false; @@ -118,13 +112,11 @@ return true; } -template -void InternalIoStatementState::BackspaceRecord() { +template void InternalIoStatementState::BackspaceRecord() { unit_.BackspaceRecord(*this); } -template -int InternalIoStatementState::EndIoStatement() { +template int InternalIoStatementState::EndIoStatement() { if constexpr (DIR == Direction::Output) { unit_.EndIoStatement(); // fill } @@ -135,31 +127,28 @@ return result; } -template -void InternalIoStatementState::HandleAbsolutePosition( - std::int64_t n) { +template +void InternalIoStatementState::HandleAbsolutePosition(std::int64_t n) { return unit_.HandleAbsolutePosition(n); } -template -void InternalIoStatementState::HandleRelativePosition( - std::int64_t n) { +template +void InternalIoStatementState::HandleRelativePosition(std::int64_t n) { return unit_.HandleRelativePosition(n); } template InternalFormattedIoStatementState::InternalFormattedIoStatementState( - Buffer buffer, std::size_t length, const CHAR *format, + Buffer buffer, std::size_t length, const CharType *format, std::size_t formatLength, const char *sourceFile, int sourceLine) - : InternalIoStatementState{buffer, length, sourceFile, - sourceLine}, + : InternalIoStatementState{buffer, length, sourceFile, sourceLine}, ioStatementState_{*this}, format_{*this, format, formatLength} {} template InternalFormattedIoStatementState::InternalFormattedIoStatementState( - const Descriptor &d, const CHAR *format, std::size_t formatLength, + const Descriptor &d, const CharType *format, std::size_t formatLength, const char *sourceFile, int sourceLine) - : InternalIoStatementState{d, sourceFile, sourceLine}, + : InternalIoStatementState{d, sourceFile, sourceLine}, ioStatementState_{*this}, format_{*this, format, formatLength} {} template @@ -175,20 +164,19 @@ template int InternalFormattedIoStatementState::EndIoStatement() { CompleteOperation(); - return InternalIoStatementState::EndIoStatement(); + return InternalIoStatementState::EndIoStatement(); } -template -InternalListIoStatementState::InternalListIoStatementState( +template +InternalListIoStatementState::InternalListIoStatementState( Buffer buffer, std::size_t length, const char *sourceFile, int sourceLine) - : InternalIoStatementState{buffer, length, sourceFile, - sourceLine}, + : InternalIoStatementState{buffer, length, sourceFile, sourceLine}, ioStatementState_{*this} {} -template -InternalListIoStatementState::InternalListIoStatementState( +template +InternalListIoStatementState::InternalListIoStatementState( const Descriptor &d, const char *sourceFile, int sourceLine) - : InternalIoStatementState{d, sourceFile, sourceLine}, + : InternalIoStatementState{d, sourceFile, sourceLine}, ioStatementState_{*this} {} ExternalIoStatementBase::ExternalIoStatementBase( @@ -354,36 +342,6 @@ return unit().Emit(data, bytes, elementBytes, *this); } -template -bool ExternalIoStatementState::Emit(const char *data, std::size_t bytes) { - if constexpr (DIR == Direction::Input) { - Crash("ExternalIoStatementState::Emit(char) called for input statement"); - } - return unit().Emit(data, bytes, 0, *this); -} - -template -bool ExternalIoStatementState::Emit( - const char16_t *data, std::size_t chars) { - if constexpr (DIR == Direction::Input) { - Crash( - "ExternalIoStatementState::Emit(char16_t) called for input statement"); - } - return unit().Emit(reinterpret_cast(data), chars * sizeof *data, - sizeof *data, *this); -} - -template -bool ExternalIoStatementState::Emit( - const char32_t *data, std::size_t chars) { - if constexpr (DIR == Direction::Input) { - Crash( - "ExternalIoStatementState::Emit(char32_t) called for input statement"); - } - return unit().Emit(reinterpret_cast(data), chars * sizeof *data, - sizeof *data, *this); -} - template std::size_t ExternalIoStatementState::GetNextInputBytes(const char *&p) { return unit().GetNextInputBytes(p, *this); @@ -465,45 +423,9 @@ } bool IoStatementState::Emit( - const char *data, std::size_t n, std::size_t elementBytes) { + const char *data, std::size_t bytes, std::size_t elementBytes) { return common::visit( - [=](auto &x) { return x.get().Emit(data, n, elementBytes); }, u_); -} - -bool IoStatementState::Emit(const char *data, std::size_t n) { - return common::visit([=](auto &x) { return x.get().Emit(data, n); }, u_); -} - -bool IoStatementState::Emit(const char16_t *data, std::size_t chars) { - return common::visit([=](auto &x) { return x.get().Emit(data, chars); }, u_); -} - -bool IoStatementState::Emit(const char32_t *data, std::size_t chars) { - return common::visit([=](auto &x) { return x.get().Emit(data, chars); }, u_); -} - -template -bool IoStatementState::EmitEncoded(const CHAR *data0, std::size_t chars) { - // Don't allow sign extension - using UnsignedChar = std::make_unsigned_t; - const UnsignedChar *data{reinterpret_cast(data0)}; - if (GetConnectionState().useUTF8()) { - char buffer[256]; - std::size_t at{0}; - while (chars-- > 0) { - auto len{EncodeUTF8(buffer + at, *data++)}; - at += len; - if (at + maxUTF8Bytes > sizeof buffer) { - if (!Emit(buffer, at)) { - return false; - } - at = 0; - } - } - return at == 0 || Emit(buffer, at); - } else { - return Emit(data0, chars); - } + [=](auto &x) { return x.get().Emit(data, bytes, elementBytes); }, u_); } bool IoStatementState::Receive( @@ -534,11 +456,12 @@ } void IoStatementState::CompleteOperation() { - common::visit([](auto &x) { x.get().CompleteOperation(); }, u_); + common::visit([this](auto &x) { x.get().CompleteOperation(); }, u_); } int IoStatementState::EndIoStatement() { - return common::visit([](auto &x) { return x.get().EndIoStatement(); }, u_); + return common::visit( + [this](auto &x) { return x.get().EndIoStatement(); }, u_); } ConnectionState &IoStatementState::GetConnectionState() { @@ -578,7 +501,8 @@ byteCount = 0; return std::nullopt; } else { - if (GetConnectionState().isUTF8) { + const ConnectionState &connection{GetConnectionState()}; + if (connection.isUTF8) { std::size_t length{MeasureUTF8Bytes(*p)}; if (length <= bytes) { if (auto result{DecodeUTF8(p)}) { @@ -588,38 +512,19 @@ } GetIoErrorHandler().SignalError(IostatUTF8Decoding); // Error recovery: return the next byte + } else if (connection.internalIoCharKind > 1) { + byteCount = connection.internalIoCharKind; + if (byteCount == 2) { + return *reinterpret_cast(p); + } else { + return *reinterpret_cast(p); + } } byteCount = 1; return *p; } } -bool IoStatementState::EmitRepeated(char ch, std::size_t n) { - return common::visit( - [=](auto &x) { - for (std::size_t j{0}; j < n; ++j) { - if (!x.get().Emit(&ch, 1)) { - return false; - } - } - return true; - }, - u_); -} - -bool IoStatementState::EmitField( - const char *p, std::size_t length, std::size_t width) { - if (width <= 0) { - width = static_cast(length); - } - if (length > static_cast(width)) { - return EmitRepeated('*', width); - } else { - return EmitRepeated(' ', static_cast(width - length)) && - Emit(p, length); - } -} - std::optional IoStatementState::NextInField( std::optional &remaining, const DataEdit &edit) { std::size_t byteCount{0}; @@ -755,7 +660,7 @@ return io.AdvanceRecord(); } if (space) { - return io.Emit(" ", 1); + return EmitAscii(io, " ", 1); } return true; } @@ -928,21 +833,6 @@ return child_.parent().Emit(data, bytes, elementBytes); } -template -bool ChildIoStatementState::Emit(const char *data, std::size_t bytes) { - return child_.parent().Emit(data, bytes); -} - -template -bool ChildIoStatementState::Emit(const char16_t *data, std::size_t chars) { - return child_.parent().Emit(data, chars); -} - -template -bool ChildIoStatementState::Emit(const char32_t *data, std::size_t chars) { - return child_.parent().Emit(data, chars); -} - template std::size_t ChildIoStatementState::GetNextInputBytes(const char *&p) { return child_.parent().GetNextInputBytes(p); @@ -1509,23 +1399,8 @@ const char *sourceFile, int sourceLine) : NoUnitIoStatementState{*this, sourceFile, sourceLine} {} -bool InquireIOLengthState::Emit(const char *, std::size_t n, std::size_t) { - bytes_ += n; - return true; -} - -bool InquireIOLengthState::Emit(const char *p, std::size_t n) { - bytes_ += sizeof *p * n; - return true; -} - -bool InquireIOLengthState::Emit(const char16_t *p, std::size_t n) { - bytes_ += sizeof *p * n; - return true; -} - -bool InquireIOLengthState::Emit(const char32_t *p, std::size_t n) { - bytes_ += sizeof *p * n; +bool InquireIOLengthState::Emit(const char *, std::size_t bytes, std::size_t) { + bytes_ += bytes; return true; } @@ -1537,10 +1412,4 @@ return IoStatementBase::EndIoStatement(); } -template bool IoStatementState::EmitEncoded(const char *, std::size_t); -template bool IoStatementState::EmitEncoded( - const char16_t *, std::size_t); -template bool IoStatementState::EmitEncoded( - const char32_t *, std::size_t); - } // namespace Fortran::runtime::io Index: flang/runtime/namelist.cpp =================================================================== --- flang/runtime/namelist.cpp +++ flang/runtime/namelist.cpp @@ -8,6 +8,7 @@ #include "namelist.h" #include "descriptor-io.h" +#include "emit-encoded.h" #include "io-stmt.h" #include "flang/Runtime/io-api.h" #include @@ -34,17 +35,17 @@ // Internal functions to advance records and convert case const auto EmitWithAdvance{[&](char ch) -> bool { return (!connection.NeedAdvance(1) || io.AdvanceRecord()) && - io.Emit(&ch, 1); + EmitAscii(io, &ch, 1); }}; const auto EmitUpperCase{[&](const char *str) -> bool { if (connection.NeedAdvance(std::strlen(str)) && - !(io.AdvanceRecord() && io.Emit(" ", 1))) { + !(io.AdvanceRecord() && EmitAscii(io, " ", 1))) { return false; } for (; *str; ++str) { char up{*str >= 'a' && *str <= 'z' ? static_cast(*str - 'a' + 'A') : *str}; - if (!io.Emit(&up, 1)) { + if (!EmitAscii(io, &up, 1)) { return false; } } @@ -141,7 +142,6 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; - io.HandleRelativePosition(1); // skip '(' // Allow for blanks in subscripts; they're nonstandard, but not // ambiguous within the parentheses. SubscriptValue lower[maxRank], upper[maxRank], stride[maxRank]; @@ -251,7 +251,6 @@ SubscriptValue chars{static_cast(desc.ElementBytes()) / kind}; // Allow for blanks in substring bounds; they're nonstandard, but not // ambiguous within the parentheses. - io.HandleRelativePosition(1); // skip '(' std::optional lower, upper; std::size_t byteCount{0}; std::optional ch{io.GetNextNonBlank(byteCount)}; @@ -304,7 +303,6 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc, const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; - io.HandleRelativePosition(1); // skip '%' char compName[nameBufferSize]; if (GetLowerCaseName(io, compName, sizeof compName)) { const DescriptorAddendum *addendum{source.Addendum()}; @@ -436,6 +434,7 @@ do { Descriptor &mutableDescriptor{staticDesc[whichStaticDesc].descriptor()}; whichStaticDesc ^= 1; + io.HandleRelativePosition(byteCount); // skip over '(' or '%' if (*next == '(') { if (!hadSubstring && (hadSubscripts || useDescriptor->rank() == 0)) { mutableDescriptor = *useDescriptor; @@ -449,9 +448,11 @@ "NAMELIST group '%s'", name, group.groupName); return false; - } else if (!HandleSubscripts( - io, mutableDescriptor, *useDescriptor, name)) { - return false; + } else { + if (!HandleSubscripts( + io, mutableDescriptor, *useDescriptor, name)) { + return false; + } } hadSubscripts = true; } else { @@ -488,7 +489,7 @@ "No '/' found after NAMELIST group '%s'", group.groupName); return false; } - io.HandleRelativePosition(1); + io.HandleRelativePosition(byteCount); return true; } Index: flang/test/Semantics/io03.f90 =================================================================== --- flang/test/Semantics/io03.f90 +++ flang/test/Semantics/io03.f90 @@ -55,10 +55,7 @@ decimal='comma', end=9, eor=9, err=9, id=id, iomsg=msg, iostat=stat2, & pad='no', round='processor_defined', size=kk) jj - !ERROR: Invalid character kind for an internal file variable read(internal_file2, *) jj - - !ERROR: Invalid character kind for an internal file variable read(internal_file4, *) jj !ERROR: Internal file must not have a vector subscript Index: flang/unittests/Runtime/Format.cpp =================================================================== --- flang/unittests/Runtime/Format.cpp +++ flang/unittests/Runtime/Format.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "CrashHandlerFixture.h" +#include "../runtime/connection.h" #include "../runtime/format-implementation.h" #include "../runtime/io-error.h" #include @@ -24,33 +25,25 @@ public: using CharType = char; TestFormatContext() : IoErrorHandler{"format.cpp", 1} {} - bool Emit(const char *, std::size_t); - bool Emit(const char16_t *, std::size_t); - bool Emit(const char32_t *, std::size_t); + bool Emit(const char *, std::size_t, std::size_t = 0); bool AdvanceRecord(int = 1); void HandleRelativePosition(std::int64_t); void HandleAbsolutePosition(std::int64_t); void Report(const DataEdit &); ResultsTy results; MutableModes &mutableModes() { return mutableModes_; } + ConnectionState &GetConnectionState() { return connectionState_; } private: MutableModes mutableModes_; + ConnectionState connectionState_; }; -bool TestFormatContext::Emit(const char *s, std::size_t len) { +bool TestFormatContext::Emit(const char *s, std::size_t len, std::size_t) { std::string str{s, len}; results.push_back("'"s + str + '\''); return true; } -bool TestFormatContext::Emit(const char16_t *, std::size_t) { - Crash("TestFormatContext::Emit(const char16_t *) called"); - return false; -} -bool TestFormatContext::Emit(const char32_t *, std::size_t) { - Crash("TestFormatContext::Emit(const char32_t *) called"); - return false; -} bool TestFormatContext::AdvanceRecord(int n) { while (n-- > 0) {