diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -303,6 +303,9 @@ /// input is a header file (i.e. -x c-header). bool IsHeaderFile = false; + /// Name of the exec charset to convert the internal charset to. + std::string ExecCharset; + LangOptions(); // Define accessors/mutators for language options of enumeration type. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3580,6 +3580,8 @@ let Flags = [CC1Option, CC1AsOption, NoDriverOption] in { +def fexec_charset : Separate<["-"], "fexec-charset">, MetaVarName<"">, + HelpText<"Set the execution for string and character literals">; def target_cpu : Separate<["-"], "target-cpu">, HelpText<"Target a specific cpu type">; def tune_cpu : Separate<["-"], "tune-cpu">, diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -436,6 +436,9 @@ /// @name Preprocessor /// { + static void SetTranslationTables(LangOptions &Opts, clang::TargetInfo &TInfo, + clang::DiagnosticsEngine &Diags); + bool hasPreprocessor() const { return PP != nullptr; } /// Return the current preprocessor. diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -17,10 +17,12 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/LiteralTranslator.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/CharSet.h" #include "llvm/Support/DataTypes.h" namespace clang { @@ -184,9 +186,10 @@ SmallString<32> UDSuffixBuf; unsigned UDSuffixOffset; public: - CharLiteralParser(const char *begin, const char *end, - SourceLocation Loc, Preprocessor &PP, - tok::TokenKind kind); + CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, + Preprocessor &PP, tok::TokenKind kind, + ConversionState translationState = TranslateToExecCharset); + ConversionState TranslationState; bool hadError() const { return HadError; } bool isAscii() const { return Kind == tok::char_constant; } @@ -222,21 +225,25 @@ unsigned UDSuffixToken; unsigned UDSuffixOffset; public: - StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP, bool Complain = true); - StringLiteralParser(ArrayRef StringToks, - const SourceManager &sm, const LangOptions &features, - const TargetInfo &target, - DiagnosticsEngine *diags = nullptr) - : SM(sm), Features(features), Target(target), Diags(diags), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { + StringLiteralParser( + ArrayRef StringToks, Preprocessor &PP, bool Complain = true, + ConversionState translationState = TranslateToExecCharset); + StringLiteralParser(ArrayRef StringToks, const SourceManager &sm, + const LangOptions &features, const TargetInfo &target, + DiagnosticsEngine *diags = nullptr, + ConversionState translation = TranslateToExecCharset) + : SM(sm), Features(features), Target(target), Diags(diags), + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), hadError(false), Pascal(false), + TranslationState(translation) { init(StringToks); } - bool hadError; bool Pascal; + ConversionState TranslationState; + + static LiteralTranslator Translator; StringRef GetString() const { return StringRef(ResultBuf.data(), GetStringLength()); diff --git a/clang/include/clang/Lex/LiteralTranslator.h b/clang/include/clang/Lex/LiteralTranslator.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Lex/LiteralTranslator.h @@ -0,0 +1,40 @@ +//===--- clang/Lex/LiteralTranslator.h - Translator for Literals -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_LITERALTRANSLATOR_H +#define LLVM_CLANG_LEX_LITERALTRANSLATOR_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CharSet.h" + +enum ConversionState { + NoTranslation, + TranslateToSystemCharset, + TranslateToExecCharset +}; + +enum CharsetTableStatusCode { + CharsetTableOk = 1, + InvalidCharsetTable, +}; + +class LiteralTranslator { +public: + static llvm::StringRef InternalCharset; + static llvm::StringRef SystemCharset; + static llvm::StringRef ExecCharset; + static llvm::StringMap ExecCharsetTables; + + static llvm::CharSetConverter *getConversionTable(const char *Codepage); + static CharsetTableStatusCode findOrCreateExecCharsetTable(const char *To); + llvm::CharSetConverter * + getCharConversionTable(ConversionState TranslationState); +}; + +#endif diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5966,12 +5966,15 @@ << value; } - // -fexec_charset=UTF-8 is default. Reject others - if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { - StringRef value = execCharset->getValue(); - if (!value.equals_lower("utf-8")) - D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) - << value; + // Pass all -fexec-charset options to cc1. + std::vector vList = + Args.getAllArgValues(options::OPT_fexec_charset_EQ); + // Set the default fexec-charset as the system charset. + CmdArgs.push_back("-fexec-charset"); + CmdArgs.push_back(Args.MakeArgString(Triple.getSystemCharset())); + for (auto it = vList.begin(), ie = vList.end(); it != ie; ++it) { + CmdArgs.push_back("-fexec-charset"); + CmdArgs.push_back(Args.MakeArgString(*it)); } RenderDiagnosticsOptions(D, Args, CmdArgs); diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -12,6 +12,7 @@ #include "clang/AST/Decl.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/SourceManager.h" @@ -29,6 +30,7 @@ #include "clang/Frontend/Utils.h" #include "clang/Frontend/VerifyDiagnosticConsumer.h" #include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/LiteralTranslator.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Sema/CodeCompleteConsumer.h" @@ -158,6 +160,36 @@ ModuleDepCollector = std::move(Collector); } +void CompilerInstance::SetTranslationTables(LangOptions &Opts, + TargetInfo &TInfo, + DiagnosticsEngine &Diags) { + using namespace llvm; + LiteralTranslator::SystemCharset = TInfo.getTriple().getSystemCharset(); + + // Create translation table between internal and system charset + if (!LiteralTranslator::InternalCharset.equals( + LiteralTranslator::SystemCharset)) { + LiteralTranslator::findOrCreateExecCharsetTable( + LiteralTranslator::SystemCharset.data()); + } + // Create translation table between internal and exec charset specified + // in fexec-charset option. + if (Opts.ExecCharset.empty()) + LiteralTranslator::ExecCharset = LiteralTranslator::InternalCharset; + else { + LiteralTranslator::ExecCharset = Opts.ExecCharset; + if (LiteralTranslator::InternalCharset.equals( + LiteralTranslator::ExecCharset)) + return; + CharsetTableStatusCode RC = LiteralTranslator::findOrCreateExecCharsetTable( + LiteralTranslator::ExecCharset.data()); + + if (RC != CharsetTableOk) + Diags.Report(diag::err_drv_invalid_value) + << "-fexec-charset" << LiteralTranslator::ExecCharset; + } +} + static void collectHeaderMaps(const HeaderSearch &HS, std::shared_ptr MDC) { SmallVector HeaderMapFileNames; @@ -924,6 +956,8 @@ if (!hasTarget()) return false; + SetTranslationTables(getLangOpts(), getTarget(), getDiagnostics()); + // Create TargetInfo for the other side of CUDA/OpenMP/SYCL compilation. if ((getLangOpts().CUDA || getLangOpts().OpenMPIsDevice || getLangOpts().SYCLIsDevice) && diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3567,6 +3567,11 @@ Args.hasFlag(OPT_fexperimental_relative_cxx_abi_vtables, OPT_fno_experimental_relative_cxx_abi_vtables, /*default=*/false); + + if (Arg *ExecCharset = Args.getLastArg(OPT_fexec_charset)) { + StringRef Value = ExecCharset->getValue(); + Opts.ExecCharset = (std::string)Value; + } } static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { diff --git a/clang/lib/Lex/CMakeLists.txt b/clang/lib/Lex/CMakeLists.txt --- a/clang/lib/Lex/CMakeLists.txt +++ b/clang/lib/Lex/CMakeLists.txt @@ -8,6 +8,7 @@ HeaderSearch.cpp Lexer.cpp LiteralSupport.cpp + LiteralTranslator.cpp MacroArgs.cpp MacroInfo.cpp ModuleMap.cpp diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -93,7 +93,8 @@ const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, - const LangOptions &Features) { + const LangOptions &Features, + llvm::CharSetConverter *Converter) { const char *EscapeBegin = ThisTokBuf; // Skip the '\' char. @@ -102,6 +103,7 @@ // We know that this character can't be off the end of the buffer, because // that would have been \", which would not have been the end of string. unsigned ResultChar = *ThisTokBuf++; + bool Translate = true; switch (ResultChar) { // These map to themselves. case '\\': case '\'': case '"': case '?': break; @@ -142,6 +144,7 @@ ResultChar = 11; break; case 'x': { // Hex escape. + Translate = false; ResultChar = 0; if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) { if (Diags) @@ -179,6 +182,7 @@ case '4': case '5': case '6': case '7': { // Octal escapes. --ThisTokBuf; + Translate = false; ResultChar = 0; // Octal escapes are a series of octal digits with maximum length 3. @@ -224,6 +228,16 @@ break; } + if (Translate && Converter) { + // ResultChar is either UTF-8 or ASCII literal and can only be converted + // to EBCDIC on z/OS if the character can be represented in one byte. + if (ResultChar < 0x100) { + SmallString<8> ResultCharConv; + Converter->convert(StringRef((char *)&ResultChar), ResultCharConv); + void *Pointer = &ResultChar; + memcpy(Pointer, ResultCharConv.data(), sizeof(unsigned)); + } + } return ResultChar; } @@ -1236,10 +1250,11 @@ /// CharLiteralParser::CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, - tok::TokenKind kind) { + tok::TokenKind kind, + ConversionState translationState) { // At this point we know that the character matches the regex "(L|u|U)?'.*'". HadError = false; - + TranslationState = translationState; Kind = kind; const char *TokBegin = begin; @@ -1302,6 +1317,9 @@ largest_character_for_kind = 0x7Fu; } + llvm::CharSetConverter *Converter = + StringLiteralParser::Translator.getCharConversionTable(TranslationState); + while (begin != end) { // Is this a span of non-escape characters? if (begin[0] != '\\') { @@ -1339,6 +1357,11 @@ HadError = true; PP.Diag(Loc, diag::err_character_too_large); } + if (!HadError && Converter) { + SmallString<1> ConvertedChar; + Converter->convert(StringRef((char *)tmp_out_start), ConvertedChar); + memmove((void *)tmp_out_start, ConvertedChar.data(), 1); + } } } @@ -1361,9 +1384,9 @@ } unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo()); uint64_t result = - ProcessCharEscape(TokBegin, begin, end, HadError, - FullSourceLoc(Loc,PP.getSourceManager()), - CharWidth, &PP.getDiagnostics(), PP.getLangOpts()); + ProcessCharEscape(TokBegin, begin, end, HadError, + FullSourceLoc(Loc, PP.getSourceManager()), CharWidth, + &PP.getDiagnostics(), PP.getLangOpts(), nullptr); *buffer_begin++ = result; } @@ -1471,13 +1494,17 @@ /// hex-digit hex-digit hex-digit hex-digit /// \endverbatim /// -StringLiteralParser:: -StringLiteralParser(ArrayRef StringToks, - Preprocessor &PP, bool Complain) - : SM(PP.getSourceManager()), Features(PP.getLangOpts()), - Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr), - MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), - ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { +LiteralTranslator StringLiteralParser::Translator; + +StringLiteralParser::StringLiteralParser(ArrayRef StringToks, + Preprocessor &PP, bool Complain, + ConversionState translationState) + : SM(PP.getSourceManager()), Features(PP.getLangOpts()), + Target(PP.getTargetInfo()), + Diags(Complain ? &PP.getDiagnostics() : nullptr), MaxTokenLength(0), + SizeBound(0), CharByteWidth(0), Kind(tok::unknown), + ResultPtr(ResultBuf.data()), hadError(false), Pascal(false), + TranslationState(translationState) { init(StringToks); } @@ -1557,6 +1584,9 @@ SourceLocation UDSuffixTokLoc; + ConversionState State = TranslationState; + llvm::CharSetConverter *Converter = Translator.getCharConversionTable(State); + for (unsigned i = 0, e = StringToks.size(); i != e; ++i) { const char *ThisTokBuf = &TokenBuf[0]; // Get the spelling of the token, which eliminates trigraphs, etc. We know @@ -1652,6 +1682,13 @@ if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF)) hadError = true; + if (!hadError && Converter) { + SmallString<256> CpConv; + int ResultLength = BeforeCRLF.size() * CharByteWidth; + unsigned char *Cp = (unsigned char *)ResultPtr - ResultLength; + Converter->convert(StringRef((char *)Cp, ResultLength), CpConv); + memmove(Cp, CpConv.data(), ResultLength); + } // Point into the \n inside the \r\n sequence and operate on the // remaining portion of the literal. RemainingTokenSpan = AfterCRLF.substr(1); @@ -1685,10 +1722,19 @@ ++ThisTokBuf; } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); + int Length = ThisTokBuf - InStart; // Copy the character span over. if (CopyStringFragment(StringToks[i], ThisTokBegin, StringRef(InStart, ThisTokBuf - InStart))) hadError = true; + + if (!hadError && Converter) { + SmallString<256> CpConv; + int ResultLength = Length * CharByteWidth; + unsigned char *Cp = (unsigned char *)ResultPtr - ResultLength; + Converter->convert(StringRef((char *)Cp, ResultLength), CpConv); + memmove(Cp, CpConv.data(), ResultLength); + } continue; } // Is this a Universal Character Name escape? @@ -1701,9 +1747,9 @@ } // Otherwise, this is a non-UCN escape character. Process it. unsigned ResultChar = - ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, - FullSourceLoc(StringToks[i].getLocation(), SM), - CharByteWidth*8, Diags, Features); + ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError, + FullSourceLoc(StringToks[i].getLocation(), SM), + CharByteWidth * 8, Diags, Features, Converter); if (CharByteWidth == 4) { // FIXME: Make the type of the result buffer correct instead of @@ -1872,6 +1918,9 @@ assert(SpellingPtr[0] == '"' && "Should be a string literal!"); ++SpellingPtr; + ConversionState State = TranslationState; + llvm::CharSetConverter *Converter = Translator.getCharConversionTable(State); + // Skip over bytes until we find the offset we're looking for. while (ByteNo) { assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!"); @@ -1897,8 +1946,8 @@ ByteNo -= Len; } else { ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError, - FullSourceLoc(Tok.getLocation(), SM), - CharByteWidth*8, Diags, Features); + FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8, + Diags, Features, Converter); --ByteNo; } assert(!HadError && "This method isn't valid on erroneous strings"); diff --git a/clang/lib/Lex/LiteralTranslator.cpp b/clang/lib/Lex/LiteralTranslator.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Lex/LiteralTranslator.cpp @@ -0,0 +1,54 @@ +//===--- LiteralTranslator.cpp - Translator for String Literals -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/LiteralTranslator.h" + +using namespace llvm; + +StringRef LiteralTranslator::InternalCharset = "UTF-8"; +StringRef LiteralTranslator::SystemCharset = ""; +StringRef LiteralTranslator::ExecCharset = ""; +llvm::StringMap LiteralTranslator::ExecCharsetTables; + +llvm::CharSetConverter * +LiteralTranslator::getConversionTable(const char *Codepage) { + auto TableIter = LiteralTranslator::ExecCharsetTables.find(Codepage); + if (TableIter != LiteralTranslator::ExecCharsetTables.end()) + return &TableIter->second; + else + return nullptr; +} + +CharsetTableStatusCode +LiteralTranslator::findOrCreateExecCharsetTable(const char *To) { + const char *From = LiteralTranslator::InternalCharset.data(); + llvm::CharSetConverter *Converter = LiteralTranslator::getConversionTable(To); + + if (Converter) + return CharsetTableOk; + + ErrorOr ErrorOrConverter = + llvm::CharSetConverter::create(From, To); + if (!ErrorOrConverter) + return InvalidCharsetTable; + LiteralTranslator::ExecCharsetTables.insert_or_assign( + StringRef(To), std::move(*ErrorOrConverter)); + return CharsetTableOk; +} + +llvm::CharSetConverter * +LiteralTranslator::getCharConversionTable(ConversionState TranslationState) { + StringRef CodePage; + if (TranslationState == TranslateToSystemCharset) + CodePage = LiteralTranslator::SystemCharset; + else if (TranslationState == TranslateToExecCharset) + CodePage = LiteralTranslator::ExecCharset; + else + CodePage = LiteralTranslator::InternalCharset; + return getConversionTable(CodePage.data()); +} diff --git a/clang/test/CodeGen/systemz-charset.c b/clang/test/CodeGen/systemz-charset.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/systemz-charset.c @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -emit-llvm -triple s390x-none-zos -fexec-charset IBM-1047 -o - | FileCheck %s +// RUN: %clang %s -emit-llvm -S -target s390x-ibm-zos -o - | FileCheck %s + +char *UpperCaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; +// CHECK: c"\C1\C2\C3\C4\C5\C6\C7\C8\C9\D1\D2\D3\D4\D5\D6\D7\D8\D9\E2\E3\E4\E5\E6\E7\E8\E9\00" + +char *LowerCaseLetters = "abcdefghijklmnopqrstuvwxyz"; +//CHECK: c"\81\82\83\84\85\86\87\88\89\91\92\93\94\95\96\97\98\99\A2\A3\A4\A5\A6\A7\A8\A9\00" + +char *Digits = "0123456789"; +// CHECK: c"\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\00" + +char *SpecialCharacters = " .<(+|&!$*);^-/,%%_>`:#@="; +// CHECK: c"@KLMNOPZ[\\]^_`akllmnyz{|~\00" + +char *EscapeCharacters = "\a\b\f\n\r\t\v\\\'\"\?"; +//CHECK: c"/\16\0C\15\0D\05\0B\E0}\7Fo\00" + +char *HexCharacters = "\x12\x13\x14"; +//CHECK: c"\12\13\14\00" + +char *OctalCharacters = "\141\142\143"; +//CHECK: c"abc\00" + +char singleChar = 'a'; +//CHECK: i8 -127 diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -209,10 +209,10 @@ // RUN: %clang_cl /source-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=source-charset-utf-16 %s // source-charset-utf-16: invalid value 'utf-16' in '/source-charset:utf-16' -// /execution-charset: should warn on everything except UTF-8. -// RUN: %clang_cl /execution-charset:utf-16 -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-utf-16 %s -// execution-charset-utf-16: invalid value 'utf-16' in '/execution-charset:utf-16' -// +// /execution-charset: should not warn on character sets. +// RUN: %clang_cl /execution-charset:iso8859-1 -### -- %s 2>&1 | FileCheck -check-prefix=execution-charset-iso8859-1 %s +// execution-charset-iso8859-1-NOT: invalid value 'iso8859-1' in '/execution-charset:iso8859-1' + // RUN: %clang_cl /Umymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s // RUN: %clang_cl /U mymacro -### -- %s 2>&1 | FileCheck -check-prefix=U %s // U: "-U" "mymacro" diff --git a/clang/test/Driver/clang_f_opts.c b/clang/test/Driver/clang_f_opts.c --- a/clang/test/Driver/clang_f_opts.c +++ b/clang/test/Driver/clang_f_opts.c @@ -209,8 +209,8 @@ // RUN: %clang -### -S -finput-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-CHARSET %s // CHECK-INVALID-CHARSET: error: invalid value 'iso-8859-1' in '-finput-charset=iso-8859-1' -// RUN: %clang -### -S -fexec-charset=iso-8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-INVALID-INPUT-CHARSET %s -// CHECK-INVALID-INPUT-CHARSET: error: invalid value 'iso-8859-1' in '-fexec-charset=iso-8859-1' +// RUN: %clang -### -S -fexec-charset=iso8859-1 -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-VALID-INPUT-CHARSET %s +// CHECK-VALID-INPUT-CHARSET-NOT: error: invalid value 'iso8859-1' in '-fexec-charset=iso8859-1' // Test that we don't error on these. // RUN: %clang -### -S -Werror \ diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -390,6 +390,9 @@ /// if the environment component is present). StringRef getOSAndEnvironmentName() const; + /// getSystemCharset - Get the system charset of the triple. + StringRef getSystemCharset() const; + /// @} /// @name Convenience Predicates /// @{ diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -1023,6 +1023,13 @@ return Tmp.split('-').second; // Strip second component } +// System charset on z/OS is IBM-1047 and UTF-8 otherwise +StringRef Triple::getSystemCharset() const { + if (getOS() == llvm::Triple::ZOS) + return "IBM-1047"; + return "UTF-8"; +} + static unsigned EatNumber(StringRef &Str) { assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number"); unsigned Result = 0;