Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -173,6 +173,10 @@ bool isCFIMoveForDebugging = false; protected: + /// Extension point for XRay to customize the generation of a synthetic + /// reference past the function body. + virtual void emitXRaySyntheticRef(MCSymbol* Symbol); + explicit AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); public: Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2771,6 +2771,12 @@ Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries } +void AsmPrinter::emitXRaySyntheticRef(MCSymbol* Symbol) { + auto WordSizeBytes = MAI->getCodePointerSize(); + OutStreamer->EmitCodeAlignment(16); + OutStreamer->EmitSymbolValue(Symbol, WordSizeBytes, false); +} + void AsmPrinter::emitXRayTable() { if (Sleds.empty()) return; @@ -2809,8 +2815,7 @@ // this is happening after the last return instruction. auto WordSizeBytes = MAI->getCodePointerSize(); MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); + emitXRaySyntheticRef(IdxRef); // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -137,6 +137,10 @@ } bool runOnMachineFunction(MachineFunction &F) override; + +protected: + /// Special implementation for X86 of synthetic reference generation in X86. + void emitXRaySyntheticRef(MCSymbol* Symbol) override; }; } // end namespace llvm Index: lib/Target/X86/X86AsmPrinter.cpp =================================================================== --- lib/Target/X86/X86AsmPrinter.cpp +++ lib/Target/X86/X86AsmPrinter.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -78,6 +78,19 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSubtargetInfo &STI); +/// emitXRaySyntheticRef - Appropriately size the synthetic reference to use +/// instructions and nop paddings before the actual relocation/symbol. +void X86AsmPrinter::emitXRaySyntheticRef(MCSymbol* Symbol) { + auto WordSizeBytes = MAI->getCodePointerSize(); + + // Use int3 and then a 7-byte nop aligned to 16-bytes. This allows us to use + // 16 bytes for the synthetic reference, so that we are 16-byte aligned. + OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); + OutStreamer->EmitInstruction(MCInstBuilder(X86::INT3), getSubtargetInfo()); + EmitNops(*OutStreamer, 7u, getSubtarget().is64Bit(), getSubtargetInfo()); + OutStreamer->EmitSymbolValue(Symbol, WordSizeBytes); +} + void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, const MCSubtargetInfo &STI, MCCodeEmitter *CodeEmitter) { Index: test/CodeGen/X86/xray-attribute-instrumentation.ll =================================================================== --- test/CodeGen/X86/xray-attribute-instrumentation.ll +++ test/CodeGen/X86/xray-attribute-instrumentation.ll @@ -14,7 +14,9 @@ ; CHECK-NEXT: nopw %cs:512(%rax,%rax) } ; CHECK: .p2align 4, 0x90 -; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_0 +; CHECK-NEXT: int3 +; CHECK-NEXT: nopl 512(%rax) +; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_0{{.*}} ; CHECK-NEXT: .section {{.*}}xray_instr_map ; CHECK-LABEL: Lxray_sleds_start0: ; CHECK: .quad {{.*}}xray_sled_0 @@ -51,7 +53,9 @@ ; CHECK-NEXT: nopw %cs:512(%rax,%rax) } ; CHECK: .p2align 4, 0x90 -; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_1 +; CHECK-NEXT: int3 +; CHECK-NEXT: nopl 512(%rax) +; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_1{{.*}} ; CHECK-NEXT: .section {{.*}}xray_instr_map ; CHECK-LABEL: Lxray_sleds_start1: ; CHECK: .quad {{.*}}xray_sled_2 Index: test/CodeGen/X86/xray-tail-call-sled.ll =================================================================== --- test/CodeGen/X86/xray-tail-call-sled.ll +++ test/CodeGen/X86/xray-tail-call-sled.ll @@ -14,6 +14,8 @@ ; CHECK-NEXT: nopw %cs:512(%rax,%rax) } ; CHECK: .p2align 4, 0x90 +; CHECK-NEXT: int3 +; CHECK-NEXT: nopl 512(%rax) ; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_0{{.*}} ; CHECK-NEXT: .section {{.*}}xray_instr_map ; CHECK-LABEL: Lxray_sleds_start0: @@ -41,6 +43,8 @@ ret i32 %retval } ; CHECK: .p2align 4, 0x90 +; CHECK-NEXT: int3 +; CHECK-NEXT: nopl 512(%rax) ; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_1{{.*}} ; CHECK-LABEL: Lxray_sleds_start1: ; CHECK: .quad {{.*}}xray_sled_2