diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -110,6 +110,9 @@ ///< Set when -fxray-ignore-loops is enabled. CODEGENOPT(XRayIgnoreLoops , 1, 0) +///< Set with -fno-xray-function-index to omit the index section. +CODEGENOPT(XRayOmitFunctionIndex , 1, 0) + ///< Set the minimum number of instructions in a function to determine selective ///< XRay instrumentation. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1281,6 +1281,12 @@ defm xray_ignore_loops : OptInFFlag<"xray-ignore-loops", "Don't instrument functions with loops unless they also meet the minimum function size">; +def fxray_function_index : Flag<["-"], "fxray-function-index">, + Group, Flags<[CC1Option]>; +def fno_xray_function_index : Flag<["-"], "fno-xray-function-index">, + Group, Flags<[CC1Option]>, + HelpText<"Omit the xray index section to reduce binary size at the expense of single-function patching performance">; + def fxray_link_deps : Flag<["-"], "fxray-link-deps">, Group, Flags<[CC1Option]>, HelpText<"Tells clang to add the link dependencies for XRay.">; diff --git a/clang/include/clang/Driver/XRayArgs.h b/clang/include/clang/Driver/XRayArgs.h --- a/clang/include/clang/Driver/XRayArgs.h +++ b/clang/include/clang/Driver/XRayArgs.h @@ -31,6 +31,7 @@ bool XRayAlwaysEmitTypedEvents = false; bool XRayRT = true; bool XRayIgnoreLoops = false; + bool XRayOmitFunctionIndex = false; public: /// Parses the XRay arguments from an argument list. diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -516,6 +516,7 @@ Options.EmitAddrsig = CodeGenOpts.Addrsig; Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; + Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex; Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -105,6 +105,10 @@ options::OPT_fno_xray_ignore_loops, false)) XRayIgnoreLoops = true; + if (!Args.hasFlag(options::OPT_fxray_function_index, + options::OPT_fno_xray_function_index, true)) + XRayOmitFunctionIndex = true; + auto Bundles = Args.getAllArgValues(options::OPT_fxray_instrumentation_bundle); if (Bundles.empty()) @@ -204,6 +208,9 @@ if (XRayIgnoreLoops) CmdArgs.push_back("-fxray-ignore-loops"); + if (XRayOmitFunctionIndex) + CmdArgs.push_back("-fno-xray-function-index"); + CmdArgs.push_back(Args.MakeArgString(Twine(XRayInstructionThresholdOption) + Twine(InstructionThreshold))); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1081,6 +1081,7 @@ Opts.XRayInstructionThreshold = getLastArgIntValue(Args, OPT_fxray_instruction_threshold_EQ, 200, Diags); Opts.XRayIgnoreLoops = Args.hasArg(OPT_fxray_ignore_loops); + Opts.XRayOmitFunctionIndex = Args.hasArg(OPT_fno_xray_function_index); auto XRayInstrBundles = Args.getAllArgValues(OPT_fxray_instrumentation_bundle); diff --git a/clang/test/Driver/XRay/xray-function-index-flags.cpp b/clang/test/Driver/XRay/xray-function-index-flags.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Driver/XRay/xray-function-index-flags.cpp @@ -0,0 +1,20 @@ +// This test ensures that when we invoke the clang compiler, that the -cc1 +// options respect the -fno-xray-function-index flag we provide in the +// invocation. The default should be to *include* the function index. +// +// RUN: %clang -fxray-instrument -fxray-function-index -target x86_64-linux- -### \ +// RUN: -x c++ -std=c++11 -emit-llvm -c -o - %s 2>&1 \ +// RUN: | FileCheck %s +// CHECK-NOT: -fno-xray-function-index +// +// RUN: %clang -fxray-instrument -target x86_64-linux- -### \ +// RUN: -x c++ -std=c++11 -emit-llvm -c -o - %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix CHECK-DEFAULT +// CHECK-DEFAULT-NOT: -fno-xray-function-index +// +// RUN: %clang -fxray-instrument -fno-xray-function-index -target x86_64-linux- -### \ +// RUN: -x c++ -std=c++11 -emit-llvm -c -o - %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix CHECK-DISABLED +// CHECK-DISABLED: -fno-xray-function-index +// +// REQUIRES: x86_64 || x86_64h diff --git a/compiler-rt/lib/xray/xray_init.cpp b/compiler-rt/lib/xray/xray_init.cpp --- a/compiler-rt/lib/xray/xray_init.cpp +++ b/compiler-rt/lib/xray/xray_init.cpp @@ -84,8 +84,24 @@ SpinMutexLock Guard(&XRayInstrMapMutex); XRayInstrMap.Sleds = __start_xray_instr_map; XRayInstrMap.Entries = __stop_xray_instr_map - __start_xray_instr_map; - XRayInstrMap.SledsIndex = __start_xray_fn_idx; - XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx; + if (__start_xray_fn_idx != nullptr) { + XRayInstrMap.SledsIndex = __start_xray_fn_idx; + XRayInstrMap.Functions = __stop_xray_fn_idx - __start_xray_fn_idx; + } else { + size_t CountFunctions = 0; + uint64_t LastFnAddr = 0; + + for (std::size_t I = 0; I < XRayInstrMap.Entries; I++) { + const auto &Sled = XRayInstrMap.Sleds[I]; + const auto Function = Sled.function(); + if (Function != LastFnAddr) { + CountFunctions++; + LastFnAddr = Function; + } + } + + XRayInstrMap.Functions = CountFunctions; + } } atomic_store(&XRayInitialized, true, memory_order_release); diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -175,6 +175,33 @@ return Success; } +const XRayFunctionSledIndex +findFunctionSleds(int32_t FuncId, + const XRaySledMap &InstrMap) XRAY_NEVER_INSTRUMENT { + int32_t CurFn = 0; + uint64_t LastFnAddr = 0; + XRayFunctionSledIndex Index = {nullptr, nullptr}; + + for (std::size_t I = 0; I < InstrMap.Entries && CurFn <= FuncId; I++) { + const auto &Sled = InstrMap.Sleds[I]; + const auto Function = Sled.function(); + if (Function != LastFnAddr) { + CurFn++; + LastFnAddr = Function; + } + + if (CurFn == FuncId) { + if (Index.Begin == nullptr) + Index.Begin = &Sled; + Index.End = &Sled; + } + } + + Index.End += 1; + + return Index; +} + XRayPatchingStatus patchFunction(int32_t FuncId, bool Enable) XRAY_NEVER_INSTRUMENT { if (!atomic_load(&XRayInitialized, @@ -205,10 +232,10 @@ } // Now we patch ths sleds for this specific function. - auto SledRange = InstrMap.SledsIndex[FuncId - 1]; + auto SledRange = InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1] + : findFunctionSleds(FuncId, InstrMap); auto *f = SledRange.Begin; auto *e = SledRange.End; - bool SucceedOnce = false; while (f != e) SucceedOnce |= patchSled(*f++, Enable, FuncId); @@ -335,7 +362,8 @@ // Here we compute the minumum sled and maximum sled associated with a // particular function ID. - auto SledRange = InstrMap.SledsIndex[FuncId - 1]; + auto SledRange = InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1] + : findFunctionSleds(FuncId, InstrMap); auto *f = SledRange.Begin; auto *e = SledRange.End; auto *MinSled = f; @@ -463,10 +491,18 @@ int __xray_remove_handler_arg1() { return __xray_set_handler_arg1(nullptr); } uintptr_t __xray_function_address(int32_t FuncId) XRAY_NEVER_INSTRUMENT { - SpinMutexLock Guard(&XRayInstrMapMutex); - if (FuncId <= 0 || static_cast(FuncId) > XRayInstrMap.Functions) + XRaySledMap InstrMap; + { + SpinMutexLock Guard(&XRayInstrMapMutex); + InstrMap = XRayInstrMap; + } + + if (FuncId <= 0 || static_cast(FuncId) > InstrMap.Functions) return 0; - return XRayInstrMap.SledsIndex[FuncId - 1].Begin->function() + const XRaySledEntry *Sled = InstrMap.SledsIndex + ? InstrMap.SledsIndex[FuncId - 1].Begin + : findFunctionSleds(FuncId, InstrMap).Begin; + return Sled->function() // On PPC, function entries are always aligned to 16 bytes. The beginning of a // sled might be a local entry, which is always +8 based on the global entry. // Always return the global entry. diff --git a/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp b/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp --- a/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp @@ -2,6 +2,8 @@ // // RUN: %clangxx_xray -std=c++11 %s -o %t // RUN: XRAY_OPTIONS="patch_premain=false" %run %t | FileCheck %s +// RUN: %clangxx_xray -fno-xray-function-index -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false" %run %t | FileCheck %s // UNSUPPORTED: target-is-mips64,target-is-mips64el diff --git a/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp b/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp --- a/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp @@ -3,6 +3,8 @@ // // RUN: %clangxx_xray -std=c++11 %s -o %t // RUN: XRAY_OPTIONS="patch_premain=false" %run %t +// RUN: %clangxx_xray -fno-xray-function-index -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false" %run %t // UNSUPPORTED: target-is-mips64,target-is-mips64el diff --git a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp --- a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp @@ -3,6 +3,8 @@ // // RUN: %clangxx_xray -fxray-instrument -std=c++11 %s -o %t // RUN: XRAY_OPTIONS="patch_premain=false" %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_xray -fxray-instrument -fno-xray-function-index -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false" %run %t 2>&1 | FileCheck %s // UNSUPPORTED: target-is-mips64,target-is-mips64el diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -118,6 +118,8 @@ bool getForceDwarfFrameSection(); +bool getXRayOmitFunctionIndex(); + /// Create this object with static storage to register codegen-related command /// line options. struct RegisterCodeGenFlags { diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -128,6 +128,7 @@ SupportsDefaultOutlining(false), EmitAddrsig(false), EmitCallSiteInfo(false), SupportsDebugEntryValues(false), EnableDebugEntryValues(false), ForceDwarfFrameSection(false), + XRayOmitFunctionIndex(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs @@ -293,6 +294,9 @@ /// Emit DWARF debug frame section. unsigned ForceDwarfFrameSection : 1; + /// Emit XRay Function Index section + unsigned XRayOmitFunctionIndex : 1; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3235,14 +3235,17 @@ InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, Flags, 0, GroupName, MCSection::NonUniqueID, LinkedToSym); - FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, - Flags | ELF::SHF_WRITE, 0, GroupName, - MCSection::NonUniqueID, LinkedToSym); + + if (!TM.Options.XRayOmitFunctionIndex) + FnSledIndex = OutContext.getELFSection( + "xray_fn_idx", ELF::SHT_PROGBITS, Flags | ELF::SHF_WRITE, 0, + GroupName, MCSection::NonUniqueID, LinkedToSym); } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, SectionKind::getReadOnlyWithRel()); - FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx", 0, - SectionKind::getReadOnlyWithRel()); + if (!TM.Options.XRayOmitFunctionIndex) + FnSledIndex = OutContext.getMachOSection( + "__DATA", "xray_fn_idx", 0, SectionKind::getReadOnlyWithRel()); } else { llvm_unreachable("Unsupported target"); } @@ -3285,11 +3288,13 @@ // that bound the instrumentation map as the range for a specific function. // Each entry here will be 2 * word size aligned, as we're writing down two // pointers. This should work for both 32-bit and 64-bit platforms. - OutStreamer->SwitchSection(FnSledIndex); - OutStreamer->emitCodeAlignment(2 * WordSizeBytes); - OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false); - OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false); - OutStreamer->SwitchSection(PrevSection); + if (FnSledIndex) { + OutStreamer->SwitchSection(FnSledIndex); + OutStreamer->emitCodeAlignment(2 * WordSizeBytes); + OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false); + OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false); + OutStreamer->SwitchSection(PrevSection); + } Sleds.clear(); } diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -86,6 +86,7 @@ CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableDebugEntryValues) CGOPT(bool, ForceDwarfFrameSection) +CGOPT(bool, XRayOmitFunctionIndex) codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { #define CGBINDOPT(NAME) \ @@ -404,6 +405,11 @@ cl::desc("Always emit a debug frame section."), cl::init(false)); CGBINDOPT(ForceDwarfFrameSection); + static cl::opt XRayOmitFunctionIndex( + "no-xray-index", cl::desc("Don't emit xray_fn_idx section"), + cl::init(false)); + CGBINDOPT(XRayOmitFunctionIndex); + #undef CGBINDOPT mc::RegisterMCTargetOptionsFlags(); @@ -470,6 +476,7 @@ Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); + Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); diff --git a/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll b/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll @@ -0,0 +1,33 @@ +; RUN: llc -filetype=asm -no-xray-index -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { +; CHECK-LABEL: Lxray_sled_0: +; CHECK-NEXT: b #32 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-LABEL: Ltmp0: + ret i32 0 +; CHECK-LABEL: Lxray_sled_1: +; CHECK-NEXT: b #32 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-LABEL: Ltmp1: +; CHECK-NEXT: ret +} +; CHECK-LABEL: xray_instr_map +; CHECK-LABEL: Lxray_sleds_start0 +; CHECK: .xword .Lxray_sled_0 +; CHECK: .xword .Lxray_sled_1 +; CHECK-LABEL: Lxray_sleds_end0 + +; CHECK-NOT: xray_fn_idx \ No newline at end of file