diff --git a/llvm/include/llvm/Support/AutoConvert.h b/llvm/include/llvm/Support/AutoConvert.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/AutoConvert.h @@ -0,0 +1,40 @@ +//===- AutoConvert.h - Auto conversion between ASCII/EBCDIC -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains functions used for auto conversion between +// ASCII/EBCDIC codepages specific to z/OS. +// +//===----------------------------------------------------------------------===//i + +#ifndef LLVM_SUPPORT_AUTOCONVERT_H +#define LLVM_SUPPORT_AUTOCONVERT_H + +#ifdef __MVS__ +#define CCSID_IBM_1047 1047 +#define CCSID_UTF_8 1208 +#include + +namespace llvm { + +/// \brief Disable the z/OS enhanced ASCII auto-conversion for the file +/// descriptor. +std::error_code disableAutoConversion(int FD); + +/// \brief Query the z/OS enhanced ASCII auto-conversion status of a file +/// descriptor and force the conversion if the file is not tagged with a +/// codepage. +std::error_code enableAutoConversion(int FD); + +/// \brief Set the tag information for a file descriptor. +std::error_code setFileTag(int FD, int CCSID, bool Text); + +} // namespace llvm + +#endif // __MVS__ + +#endif // LLVM_SUPPORT_AUTOCONVERT_H diff --git a/llvm/lib/Support/AutoConvert.cpp b/llvm/lib/Support/AutoConvert.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Support/AutoConvert.cpp @@ -0,0 +1,62 @@ +//===- AutoConvert.cpp - Auto conversion between ASCII/EBCDIC -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains functions used for auto conversion between +// ASCII/EBCDIC codepages specific to z/OS. +// +//===----------------------------------------------------------------------===// + +#ifdef __MVS__ + +#include "llvm/Support/AutoConvert.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +std::error_code llvm::disableAutoConversion(int FD) { + static const struct f_cnvrt Convert = { + SETCVTOFF, // cvtcmd + 0, // pccsid + (short)FT_BINARY, // fccsid + }; + return fcntl(FD, F_CONTROL_CVT, &Convert); +} + +std::error_code llvm::enableAutoConversion(int FD) { + struct f_cnvrt Query = { + QUERYCVT, // cvtcmd + 0, // pccsid + 0, // fccsid + }; + + if (fcntl(FD, F_CONTROL_CVT, &Query) == -1) + return -1; + + Query.cvtcmd = SETCVTALL; + Query.pccsid = + (FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO) + ? 0 + : CCSID_UTF_8; + // Assume untagged files to be IBM-1047 encoded. + Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid; + return fcntl(FD, F_CONTROL_CVT, &Query); +} + +std::error_code llvm::setFileTag(int FD, int CCSID, bool Text) { + assert((!Text || (CCSID != FT_UNTAGGED && CCSID != FT_BINARY)) && + "FT_UNTAGGED and FT_BINARY are not allowed for text files"); + struct file_tag Tag; + Tag.ft_ccsid = CCSID; + Tag.ft_txtflag = Text; + Tag.ft_deferred = 0; + Tag.ft_rsvflags = 0; + + return fcntl(FD, F_SETTAG, &Tag); +} + +#endif // __MVS__ diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -93,6 +93,7 @@ ARMAttributeParser.cpp ARMWinEH.cpp Allocator.cpp + AutoConvert.cpp BinaryStreamError.cpp BinaryStreamReader.cpp BinaryStreamRef.cpp diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" +#include "llvm/Support/AutoConvert.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" @@ -467,6 +468,12 @@ return std::move(Result); } +#ifdef __MVS__ + // Set codepage auto-conversion for z/OS. + if (auto EC = llvm::enableAutoConversion(FD)) + return EC; +#endif + auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); if (!Buf) { // Failed to create a buffer. The only way it can fail is if diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -50,6 +50,7 @@ #elif defined(__DragonFly__) #include #elif defined(__MVS__) +#include "llvm/Support/AutoConvert.h" #include #endif @@ -959,8 +960,13 @@ // Nothing special, just don't add O_CREAT and we get these semantics. } +// Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when +// calling write(). Instead we need to use lseek() to set offset to EOF after +// open(). +#ifndef __MVS__ if (Flags & OF_Append) Result |= O_APPEND; +#endif #ifdef O_CLOEXEC if (!(Flags & OF_ChildInherit)) @@ -989,6 +995,88 @@ assert(r == 0 && "fcntl(F_SETFD, FD_CLOEXEC) failed"); } #endif + +#ifdef __MVS__ + /* Reason about auto-conversion and file tags. Setting the file tag only + * applies if file is opened in write mode: + * + * Text file: + * File exists File created + * CD_CreateNew n/a conv: on + * tag: set 1047 + * CD_CreateAlways conv: auto conv: on + * tag: auto 1047 tag: set 1047 + * CD_OpenAlways conv: auto conv: on + * tag: auto 1047 tag: set 1047 + * CD_OpenExisting conv: auto n/a + * tag: unchanged + * + * Binary file: + * File exists File created + * CD_CreateNew n/a conv: off + * tag: set binary + * CD_CreateAlways conv: off conv: off + * tag: auto binary tag: set binary + * CD_OpenAlways conv: off conv: off + * tag: auto binary tag: set binary + * CD_OpenExisting conv: off n/a + * tag: unchanged + * + * Actions: + * conv: off -> auto-conversion is turned off + * conv: on -> auto-conversion is turned on + * conv: auto -> auto-conversion is turned on if the file is untagged + * tag: set 1047 -> set the file tag to text encoded in 1047 + * tag: set binary -> set the file tag to binary + * tag: auto 1047 -> set file tag to 1047 if not set + * tag: auto binary -> set file tag to binary if not set + * tag: unchanged -> do not care about the file tag + * + * It is not possible to distinguish between the cases "file exists" and + * "file created". In the latter case, the file tag is not set and the file + * size is zero. The decision table boils down to: + * + * the file tag is set if + * - the file is opened for writing + * - the create disposition is not equal to CD_OpenExisting + * - the file tag is not set + * - the file size is zero + * + * This only applies if the file is a regular file. E.g. enabling + * auto-conversion for reading from /dev/null results in error EINVAL when + * calling read(). + * + * Using append mode with z/OS UTF-8 auto-conversion results in EINVAL when + * calling write(). Instead we need to use lseek() to set offset to EOF after + * open(). + */ + if ((Flags & OF_Append) && lseek(ResultFD, 0, SEEK_END) == -1) + return std::error_code(errno, std::generic_category()); + struct stat Stat; + if (fstat(ResultFD, &Stat) == -1) + return std::error_code(errno, std::generic_category()); + if (S_ISREG(Stat.st_mode)) { + bool DoSetTag = (Access & FA_Write) && (Disp != CD_OpenExisting) && + !Stat.st_tag.ft_txtflag && !Stat.st_tag.ft_ccsid && + Stat.st_size == 0; + if (Flags & OF_Text) { + if (auto EC = llvm::enableAutoConversion(ResultFD)) + return EC; + if (DoSetTag) { + if (auto EC = llvm::setFileTag(ResultFD, CCSID_IBM_1047, true)) + return EC; + } + } else { + if (auto EC = llvm::disableAutoConversion(ResultFD)) + return EC; + if (DoSetTag) { + if (auto EC = llvm::setFileTag(ResultFD, FT_BINARY, false)) + return EC; + } + } + } +#endif + return std::error_code(); } diff --git a/llvm/test/Support/encoding.ll b/llvm/test/Support/encoding.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Support/encoding.ll @@ -0,0 +1,10 @@ +; Checks if llc can deal with different char encodings. +; This is only required for z/OS. +; +; UNSUPPORTED: !s390x-none-zos +; +; RUN: cat %s >%t && chtag -tc ISO8859-1 %t && llc %t -o - >/dev/null +; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -tc IBM-1047 %t && llc %t -o - >/dev/null +; RUN: iconv -f ISO8859-1 -t IBM-1047 <%s >%t && chtag -r %t && llc %t -o - >/dev/null + +@g_105 = external dso_local global i8, align 2