diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -196,6 +196,16 @@ /// doesn't support this, it can be set to null. Defaults to "\t.asciz\t" const char *AscizDirective; + /// This directive accepts a comma-separated list of bytes for emission as a + /// string of bytes. For targets that do not support this, it shall be set to + /// null. Defaults to null. + const char *ByteListDirective = nullptr; + + /// Format string in the style of printf for character literals in the + /// assembly syntax. Useful for producing strings as byte lists. If a target + /// does not use or support this, it shall be set to null. Defaults to null. + const char *CharacterLiteralFormatString = nullptr; + /// These directives are used to output some unit of integer data to the /// current section. If a data directive is set to null, smaller data /// directives will be used to emit the large sizes. Defaults to "\t.byte\t", @@ -563,6 +573,10 @@ } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } + const char *getByteListDirective() const { return ByteListDirective; } + const char *characterLiteralFormatString() const { + return CharacterLiteralFormatString; + } bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; } unsigned getTextAlignFillValue() const { return TextAlignFillValue; } const char *getGlobalDirective() const { return GlobalDirective; } diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp --- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp +++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp @@ -20,6 +20,8 @@ ZeroDirectiveSupportsNonZeroValue = false; AsciiDirective = nullptr; // not supported AscizDirective = nullptr; // not supported + ByteListDirective = "\t.byte\t"; + CharacterLiteralFormatString = "'%c"; // single quote + the character Data64bitsDirective = "\t.llong\t"; COMMDirectiveAlignmentIsInBytes = false; LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -890,6 +890,38 @@ static inline char toOctal(int X) { return (X&7)+'0'; } +static void PrintByteList(StringRef Data, raw_ostream &OS, + const char *CharacterLiteralFormatString) { + assert(!Data.empty() && "Cannot generate an empty list."); + const auto printCharacterInOctal = [&OS](unsigned char C) { + OS << '0'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + }; + const auto printOneCharacter = [&OS, CharacterLiteralFormatString, + printCharacterInOctal](unsigned char C) { + if (isPrint(C)) { + OS << format(CharacterLiteralFormatString, static_cast(C)); + return; + } + printCharacterInOctal(C); + }; + const auto printCharacterList = [Data, &OS](const auto &printOneCharacter) { + const auto BeginPtr = Data.begin(), EndPtr = Data.end(); + for (const unsigned char C : make_range(BeginPtr, EndPtr - 1)) { + printOneCharacter(C); + OS << ','; + } + printOneCharacter(*(EndPtr - 1)); + }; + if (LLVM_LIKELY(CharacterLiteralFormatString)) { + printCharacterList(printOneCharacter); + return; + } + printCharacterList(printCharacterInOctal); +} + static void PrintQuotedString(StringRef Data, raw_ostream &OS) { OS << '"'; @@ -928,33 +960,42 @@ "Cannot emit contents before setting section!"); if (Data.empty()) return; - // If only single byte is provided or no ascii or asciz directives is - // supported, emit as vector of 8bits data. - if (Data.size() == 1 || - !(MAI->getAscizDirective() || MAI->getAsciiDirective())) { - if (MCTargetStreamer *TS = getTargetStreamer()) { - TS->emitRawBytes(Data); + const auto emitAsString = [this](StringRef Data) { + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii or a byte-list directive + if (MAI->getAscizDirective() && Data.back() == 0) { + OS << MAI->getAscizDirective(); + Data = Data.substr(0, Data.size() - 1); + } else if (LLVM_LIKELY(MAI->getAsciiDirective())) { + OS << MAI->getAsciiDirective(); + } else if (MAI->getByteListDirective()) { + OS << MAI->getByteListDirective(); + PrintByteList(Data, OS, MAI->characterLiteralFormatString()); + EmitEOL(); + return true; } else { - const char *Directive = MAI->getData8bitsDirective(); - for (const unsigned char C : Data.bytes()) { - OS << Directive << (unsigned)C; - EmitEOL(); - } + return false; } + + PrintQuotedString(Data, OS); + EmitEOL(); + return true; + }; + + if (Data.size() != 1 && emitAsString(Data)) return; - } - // If the data ends with 0 and the target supports .asciz, use it, otherwise - // use .ascii - if (MAI->getAscizDirective() && Data.back() == 0) { - OS << MAI->getAscizDirective(); - Data = Data.substr(0, Data.size()-1); - } else { - OS << MAI->getAsciiDirective(); + // Only single byte is provided or no ascii, asciz, or byte-list directives + // are applicable. Emit as vector of individual 8bits data elements. + if (MCTargetStreamer *TS = getTargetStreamer()) { + TS->emitRawBytes(Data); + return; + } + const char *Directive = MAI->getData8bitsDirective(); + for (const unsigned char C : Data.bytes()) { + OS << Directive << (unsigned)C; + EmitEOL(); } - - PrintQuotedString(Data, OS); - EmitEOL(); } void MCAsmStreamer::emitBinaryData(StringRef Data) { diff --git a/llvm/test/CodeGen/PowerPC/aix-bytestring.ll b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-bytestring.ll @@ -0,0 +1,7 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s + +@str = constant [256 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A\1B\1C\1D\1E\1F !\22#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\7F\80\81\82\83\84\85\86\87\88\89\8A\8B\8C\8D\8E\8F\90\91\92\93\94\95\96\97\98\99\9A\9B\9C\9D\9E\9F\A0\A1\A2\A3\A4\A5\A6\A7\A8\A9\AA\AB\AC\AD\AE\AF\B0\B1\B2\B3\B4\B5\B6\B7\B8\B9\BA\BB\BC\BD\BE\BF\C0\C1\C2\C3\C4\C5\C6\C7\C8\C9\CA\CB\CC\CD\CE\CF\D0\D1\D2\D3\D4\D5\D6\D7\D8\D9\DA\DB\DC\DD\DE\DF\E0\E1\E2\E3\E4\E5\E6\E7\E8\E9\EA\EB\EC\ED\EE\EF\F0\F1\F2\F3\F4\F5\F6\F7\F8\F9\FA\FB\FC\FD\FE\FF\00", align 1 + +; CHECK-LABEL:str: +; CHECK-NEXT: .byte 0001,0002,0003,0004,0005,0006,0007,0010,0011,0012,0013,0014,0015,0016,0017,0020,0021,0022,0023,0024,0025,0026,0027,0030,0031,0032,0033,0034,0035,0036,0037,' ,'!,'",'#,'$,'%,'&,'','(,'),'*,'+,',,'-,'.,'/,'0,'1,'2,'3,'4,'5,'6,'7,'8,'9,':,';,'<,'=,'>,'?,'@,'A,'B,'C,'D,'E,'F,'G,'H,'I,'J,'K,'L,'M,'N,'O,'P,'Q,'R,'S,'T,'U,'V,'W,'X,'Y,'Z,'[,'\,'],'^,'_,'`,'a,'b,'c,'d,'e,'f,'g,'h,'i,'j,'k,'l,'m,'n,'o,'p,'q,'r,'s,'t,'u,'v,'w,'x,'y,'z,'{,'|,'},'~,0177,0200,0201,0202,0203,0204,0205,0206,0207,0210,0211,0212,0213,0214,0215,0216,0217,0220,0221,0222,0223,0224,0225,0226,0227,0230,0231,0232,0233,0234,0235,0236,0237,0240,0241,0242,0243,0244,0245,0246,0247,0250,0251,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267,0270,0271,0272,0273,0274,0275,0276,0277,0300,0301,0302,0303,0304,0305,0306,0307,0310,0311,0312,0313,0314,0315,0316,0317,0320,0321,0322,0323,0324,0325,0326,0327,0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0345,0346,0347,0350,0351,0352,0353,0354,0355,0356,0357,0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0372,0373,0374,0375,0376,0377,0000 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data.ll @@ -74,10 +74,7 @@ ; CHECK: .globl chrarray ; CHECK-NEXT: chrarray: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 +; CHECK-NEXT: .byte 'a,'b,'c,'d ; CHECK: .globl dblarr ; CHECK-NEXT: .align 3 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-mergeable-str.ll @@ -41,30 +41,9 @@ ; CHECK-NEXT: .long 0 # 0x0 ; CHECK-NEXT: .csect .rodata.str1.1[RO],2 ; CHECK-NEXT: .LstrA: -; CHECK-NEXT: .byte 104 -; CHECK-NEXT: .byte 101 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 111 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 119 -; CHECK-NEXT: .byte 111 -; CHECK-NEXT: .byte 114 -; CHECK-NEXT: .byte 108 -; CHECK-NEXT: .byte 100 -; CHECK-NEXT: .byte 33 -; CHECK-NEXT: .byte 10 -; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 'h,'e,'l,'l,'o,' ,'w,'o,'r,'l,'d,'!,0012,0000 ; CHECK-NEXT: .L.str: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 -; CHECK-NEXT: .byte 101 -; CHECK-NEXT: .byte 102 -; CHECK-NEXT: .byte 103 -; CHECK-NEXT: .byte 104 -; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 'a,'b,'c,'d,'e,'f,'g,'h,0000 ; CHECKOBJ: 00000010 <.rodata.str2.2>: ; CHECKOBJ-NEXT: 10: 01 08 01 10 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-rodata.ll @@ -47,10 +47,7 @@ ; CHECK-NEXT: .llong 0x408c200000000000 ; CHECK-NEXT: .globl const_chrarray ; CHECK-NEXT: const_chrarray: -; CHECK-NEXT: .byte 97 -; CHECK-NEXT: .byte 98 -; CHECK-NEXT: .byte 99 -; CHECK-NEXT: .byte 100 +; CHECK-NEXT: .byte 'a,'b,'c,'d ; CHECK-NEXT: .globl const_dblarr ; CHECK-NEXT: .align 3 ; CHECK-NEXT: const_dblarr: