Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -458,6 +458,10 @@ void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const; + /// Emit something like ".uleb128 Hi-Lo". + void EmitLabelDifferenceAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) const; + /// Emit something like ".long Label+Offset" where the size in bytes of the /// directive is specified by Size and Label specifies the label. This /// implicitly uses .set if it is available. Index: include/llvm/MC/MCObjectStreamer.h =================================================================== --- include/llvm/MC/MCObjectStreamer.h +++ include/llvm/MC/MCObjectStreamer.h @@ -179,6 +179,9 @@ void emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) override; + void emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) override; + bool mayHaveInstructions(MCSection &Sec) const override; }; Index: include/llvm/MC/MCStreamer.h =================================================================== --- include/llvm/MC/MCStreamer.h +++ include/llvm/MC/MCStreamer.h @@ -828,6 +828,10 @@ virtual void emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size); + /// Emit the absolute difference between two symbols encoded with ULEB128. + virtual void emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo); + virtual MCSymbol *getDwarfLineTableSymbol(unsigned CUID); virtual void EmitCFISections(bool EH, bool Debug); void EmitCFIStartProc(bool IsSimple); Index: lib/CodeGen/AsmPrinter/ARMException.cpp =================================================================== --- lib/CodeGen/AsmPrinter/ARMException.cpp +++ lib/CodeGen/AsmPrinter/ARMException.cpp @@ -91,7 +91,7 @@ ATS.emitFnEnd(); } -void ARMException::emitTypeInfos(unsigned TTypeEncoding) { +void ARMException::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { const MachineFunction *MF = Asm->MF; const std::vector &TypeInfos = MF->getTypeInfos(); const std::vector &FilterIds = MF->getFilterIds(); @@ -112,6 +112,9 @@ Asm->EmitTTypeReference(GV, TTypeEncoding); } + if (TTBaseLabel) + Asm->OutStreamer->EmitLabel(TTBaseLabel); + // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); Index: lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -59,6 +59,12 @@ OutStreamer->EmitULEB128IntValue(Value); } +/// Emit something like ".uleb128 Hi-Lo". +void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) const { + OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); +} + static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { case dwarf::DW_EH_PE_absptr: @@ -67,6 +73,10 @@ return "omit"; case dwarf::DW_EH_PE_pcrel: return "pcrel"; + case dwarf::DW_EH_PE_uleb128: + return "uleb128"; + case dwarf::DW_EH_PE_sleb128: + return "sleb128"; case dwarf::DW_EH_PE_udata4: return "udata4"; case dwarf::DW_EH_PE_udata8: Index: lib/CodeGen/AsmPrinter/DwarfException.h =================================================================== --- lib/CodeGen/AsmPrinter/DwarfException.h +++ lib/CodeGen/AsmPrinter/DwarfException.h @@ -70,7 +70,7 @@ }; class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { - void emitTypeInfos(unsigned TTypeEncoding) override; + void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override; ARMTargetStreamer &getTargetStreamer(); public: Index: lib/CodeGen/AsmPrinter/EHStreamer.h =================================================================== --- lib/CodeGen/AsmPrinter/EHStreamer.h +++ lib/CodeGen/AsmPrinter/EHStreamer.h @@ -110,7 +110,7 @@ /// catches in the function. This tables is reversed indexed base 1. void emitExceptionTable(); - virtual void emitTypeInfos(unsigned TTypeEncoding); + virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel); // Helpers for identifying what kind of clause an EH typeid or selector // corresponds to. Negative selectors are for filter clauses, the zero Index: lib/CodeGen/AsmPrinter/EHStreamer.cpp =================================================================== --- lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/LEB128.h" #include #include @@ -37,6 +38,12 @@ using namespace llvm; +static cl::opt + LayoutEHTableInAssembler("layout-eh-table-in-assembler", cl::Hidden, + cl::init(true), + cl::desc("Finalize EH table layout in the " + "assembler; use LEB128 directives")); + EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} EHStreamer::~EHStreamer() = default; @@ -378,25 +385,38 @@ // Final tallies. - // Call sites. bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; - bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; - - unsigned CallSiteTableLength; - if (IsSJLJ) - CallSiteTableLength = 0; - else { - unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4 - unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 - unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 - CallSiteTableLength = - CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); + unsigned CallSiteEncoding = + (!IsSJLJ && LayoutEHTableInAssembler) ? dwarf::DW_EH_PE_uleb128 : + dwarf::DW_EH_PE_udata4; + bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); + + // Call sites. + unsigned CallSiteTableLength = 0; + unsigned CallSiteTableLengthSize = 0; + if (!LayoutEHTableInAssembler) { + if (!IsSJLJ) { + unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 + CallSiteTableLength = + CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); + } + + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { + CallSiteTableLength += getULEB128Size(CallSites[i].Action); + if (IsSJLJ) + CallSiteTableLength += getULEB128Size(i); + } + + CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); } - for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { - CallSiteTableLength += getULEB128Size(CallSites[i].Action); - if (IsSJLJ) - CallSiteTableLength += getULEB128Size(i); + MCSymbol *CstBeginLabel = nullptr; + MCSymbol *CstEndLabel = nullptr; + if (LayoutEHTableInAssembler) { + CstBeginLabel = Asm->createTempSymbol("cst_begin"); + CstEndLabel = Asm->createTempSymbol("cst_end"); } // Type infos. @@ -460,64 +480,100 @@ Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); Asm->EmitEncodingByte(TTypeEncoding, "@TType"); - // The type infos need to be aligned. GCC does this by inserting padding just - // before the type infos. However, this changes the size of the exception - // table, so you need to take this into account when you output the exception - // table size. However, the size is output using a variable length encoding. - // So by increasing the size by inserting padding, you may increase the number - // of bytes used for writing the size. If it increases, say by one byte, then - // you now need to output one less byte of padding to get the type infos - // aligned. However this decreases the size of the exception table. This - // changes the value you have to output for the exception table size. Due to - // the variable length encoding, the number of bytes used for writing the - // length may decrease. If so, you then have to increase the amount of - // padding. And so on. If you look carefully at the GCC code you will see that - // it indeed does this in a loop, going on and on until the values stabilize. - // We chose another solution: don't output padding inside the table like GCC - // does, instead output it before the table. - unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; - unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); - unsigned TTypeBaseOffset = - sizeof(int8_t) + // Call site format - CallSiteTableLengthSize + // Call site table length size - CallSiteTableLength + // Call site table length - SizeActions + // Actions size - SizeTypes; - unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); - unsigned TotalSize = - sizeof(int8_t) + // LPStart format - sizeof(int8_t) + // TType format - (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size - TTypeBaseOffset; // TType base offset - unsigned PadBytes = (4 - TotalSize) & 3; - + MCSymbol *TTBaseLabel = nullptr; if (HaveTTData) { - // Account for any extra padding that will be added to the call site table - // length. - Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes, - "@TType base offset"); - PadBytes = 0; + // The type table needs to be aligned. GCC aligns it using padding just + // before the type infos. Adding or removing this padding changes the + // TTBase offset, which is encoded using a LEB128 variable-length + // encoding: + // + // .balign 4 + // GCC_except_table: + // .byte 0xff // LPStart (omitted) + // .byte 0x__ // type table encoding + // .uleb128 (.TTBaseRef - .TTBase) + // .TTBaseRef: + // ... // call site and actions tables + // .balign 4 // type info padding here + // .long _ZTI + // ... + // .long _ZTI + // .TTBase: + // + // GCC can be configured to output LEB128 directives to an assembler, or it + // can be configured to encode LEB128 values itself, emitting the + // individual byte values of an LEB128. Emitting byte values requires + // knowing the sizes of everything in the EH table, though, which requires + // using an inefficient 32-bit encoding for code offsets. If a compiler + // instead delegates EH table layout to the assembler, then it can use an + // LEB128 encoding for code offsets, which can cut the EH table size in + // half. + // + // Unfortunately, there is a dependency cycle between the size of the + // TTBase offset LEB128 and the size of the padding before the type infos. + // (Changing the size of the padding changes the size of the LEB128, which + // changes the size of the padding, and so forth.) There are typically + // two bytes before the TTBase LEB128, so there is a potential for an + // infinite loop in the toolchain when there are between 16381 and 16383 + // non-padding bytes between TTBaseRef and TTBase. + // + // The GNU and LLVM assemblers have been susceptible to this infinite loop: + // - https://sourceware.org/bugzilla/show_bug.cgi?id=4029 + // - LLVM PR35809 + // + // LLVM can also be configured to emit LEB128 directives or encode them + // itself: + // - If LayoutEHTableInAssembler is set, LLVM outputs GCC-like assembler, + // and the assembler resolves the cycle using padding in the TTBase + // LEB128 and/or before the type infos. + // - If !LayoutEHTableInAssembler, LLVM breaks the cycle by instead + // outputting padding in the TTBase LEB128 encoding. + + if (LayoutEHTableInAssembler) { + MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); + TTBaseLabel = Asm->createTempSymbol("ttbase"); + Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); + Asm->OutStreamer->EmitLabel(TTBaseRefLabel); + } else { + unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; + unsigned TTypeBaseOffset = + sizeof(int8_t) + // Call site format + CallSiteTableLengthSize + // Call site table length size + CallSiteTableLength + // Call site table length + SizeActions + // Actions size + SizeTypes; + unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); + unsigned TotalSize = + sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size + TTypeBaseOffset; // TType base offset + unsigned PadBytes = (4 - TotalSize) & 3; + + Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes, + "@TType base offset"); + } } bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + // Emit the landing pad call site table. + Asm->EmitEncodingByte(CallSiteEncoding, "Call site"); + if (LayoutEHTableInAssembler) { + Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); + Asm->OutStreamer->EmitLabel(CstBeginLabel); + } else { + Asm->EmitULEB128(CallSiteTableLength, "Call site table length"); + } + // SjLj Exception handling if (IsSJLJ) { - Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - - // Add extra padding if it wasn't added to the TType base offset. - Asm->EmitPaddedULEB128(CallSiteTableLength, - CallSiteTableLengthSize + PadBytes, - "Call site table length"); - - // Emit the landing pad site information. unsigned idx = 0; for (SmallVectorImpl::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { const CallSiteEntry &S = *I; - // Offset of the landing pad, counted in 16-byte bundles relative to the - // @LPStart address. + // Index of the call site entry. if (VerboseAsm) { Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<"); Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx)); @@ -557,13 +613,18 @@ // A missing entry in the call-site table indicates that a call is not // supposed to throw. - // Emit the landing pad call site table. - Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - - // Add extra padding if it wasn't added to the TType base offset. - Asm->EmitPaddedULEB128(CallSiteTableLength, - CallSiteTableLengthSize + PadBytes, - "Call site table length"); + const auto EmitLabelDifference = [&](MCSymbol *Hi, MCSymbol *Lo) { + if (CallSiteEncoding == dwarf::DW_EH_PE_uleb128) + Asm->EmitLabelDifferenceAsULEB128(Hi, Lo); + else + Asm->EmitLabelDifference(Hi, Lo, 4/*size*/); + }; + const auto EmitNullLandingPad = [&]() { + if (CallSiteEncoding == dwarf::DW_EH_PE_uleb128) + Asm->EmitULEB128(0); + else + Asm->OutStreamer->EmitIntValue(0, 4/*size*/); + }; unsigned Entry = 0; for (SmallVectorImpl::const_iterator @@ -579,29 +640,26 @@ if (!EndLabel) EndLabel = Asm->getFunctionEnd(); - // Offset of the call site relative to the previous call site, counted in - // number of 16-byte bundles. The first call site is counted relative to - // the start of the procedure fragment. + // Offset of the call site relative to the start of the procedure. if (VerboseAsm) Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); - Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); + EmitLabelDifference(BeginLabel, EHFuncBeginSym); if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" Call between ") + BeginLabel->getName() + " and " + EndLabel->getName()); - Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + EmitLabelDifference(EndLabel, BeginLabel); - // Offset of the landing pad, counted in 16-byte bundles relative to the - // @LPStart address. + // Offset of the landing pad relative to the start of the procedure. if (!S.LPad) { if (VerboseAsm) Asm->OutStreamer->AddComment(" has no landing pad"); - Asm->OutStreamer->EmitIntValue(0, 4/*size*/); + EmitNullLandingPad(); } else { if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" jumps to ") + S.LPad->LandingPadLabel->getName()); - Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); + EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym); } // Offset of the first associated action record, relative to the start of @@ -617,6 +675,8 @@ Asm->EmitULEB128(S.Action); } } + if (LayoutEHTableInAssembler) + Asm->OutStreamer->EmitLabel(CstEndLabel); // Emit the Action Table. int Entry = 0; @@ -660,12 +720,15 @@ Asm->EmitSLEB128(Action.NextAction); } - emitTypeInfos(TTypeEncoding); + if (LayoutEHTableInAssembler) + Asm->EmitAlignment(2); + + emitTypeInfos(TTypeEncoding, TTBaseLabel); Asm->EmitAlignment(2); } -void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { const MachineFunction *MF = Asm->MF; const std::vector &TypeInfos = MF->getTypeInfos(); const std::vector &FilterIds = MF->getFilterIds(); @@ -687,6 +750,9 @@ Asm->EmitTTypeReference(GV, TTypeEncoding); } + if (TTBaseLabel) + Asm->OutStreamer->EmitLabel(TTBaseLabel); + // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); Index: lib/MC/MCAsmStreamer.cpp =================================================================== --- lib/MC/MCAsmStreamer.cpp +++ lib/MC/MCAsmStreamer.cpp @@ -910,7 +910,7 @@ EmitULEB128IntValue(IntValue); return; } - OS << ".uleb128 "; + OS << "\t.uleb128 "; Value->print(OS, MAI); EmitEOL(); } @@ -921,7 +921,7 @@ EmitSLEB128IntValue(IntValue); return; } - OS << ".sleb128 "; + OS << "\t.sleb128 "; Value->print(OS, MAI); EmitEOL(); } Index: lib/MC/MCObjectStreamer.cpp =================================================================== --- lib/MC/MCObjectStreamer.cpp +++ lib/MC/MCObjectStreamer.cpp @@ -51,17 +51,36 @@ PendingLabels.clear(); } +static Optional absoluteSymbolDiff(const MCSymbol *Hi, + const MCSymbol *Lo) { + if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() || + Hi->isVariable() || Lo->isVariable()) + return None; + + return Hi->getOffset() - Lo->getOffset(); +} + void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) { - // If not assigned to the same (valid) fragment, fallback. - if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() || - Hi->isVariable() || Lo->isVariable()) { + Optional Diff = absoluteSymbolDiff(Hi, Lo); + if (!Diff.hasValue()) { MCStreamer::emitAbsoluteSymbolDiff(Hi, Lo, Size); return; } - EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size); + EmitIntValue(*Diff, Size); +} + +void MCObjectStreamer::emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) { + Optional Diff = absoluteSymbolDiff(Hi, Lo); + if (!Diff.hasValue()) { + MCStreamer::emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); + return; + } + + EmitULEB128IntValue(*Diff); } void MCObjectStreamer::reset() { Index: lib/MC/MCStreamer.cpp =================================================================== --- lib/MC/MCStreamer.cpp +++ lib/MC/MCStreamer.cpp @@ -902,6 +902,16 @@ EmitSymbolValue(SetLabel, Size); } +void MCStreamer::emitAbsoluteSymbolDiffAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) { + // Get the Hi-Lo expression. + const MCExpr *Diff = + MCBinaryExpr::createSub(MCSymbolRefExpr::create(Hi, Context), + MCSymbolRefExpr::create(Lo, Context), Context); + + EmitULEB128Value(Diff); +} + void MCStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} void MCStreamer::EmitThumbFunc(MCSymbol *Func) {} void MCStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}