Index: include/llvm/CodeGen/AsmPrinter.h =================================================================== --- include/llvm/CodeGen/AsmPrinter.h +++ include/llvm/CodeGen/AsmPrinter.h @@ -173,6 +173,10 @@ bool isCFIMoveForDebugging = false; protected: + /// Extension point for XRay to customize the generation of a synthetic + /// reference past the function body. + virtual void emitXRaySyntheticRef(MCSymbol* Symbol); + explicit AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); public: Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2771,6 +2771,12 @@ Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries } +void AsmPrinter::emitXRaySyntheticRef(MCSymbol* Symbol) { + auto WordSizeBytes = MAI->getCodePointerSize(); + OutStreamer->EmitCodeAlignment(16); + OutStreamer->EmitSymbolValue(Symbol, WordSizeBytes, false); +} + void AsmPrinter::emitXRayTable() { if (Sleds.empty()) return; @@ -2809,8 +2815,7 @@ // this is happening after the last return instruction. auto WordSizeBytes = MAI->getCodePointerSize(); MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); + emitXRaySyntheticRef(IdxRef); // Now we switch to the instrumentation map section. Because this is done // per-function, we are able to create an index entry that will represent the Index: lib/Target/X86/X86AsmPrinter.h =================================================================== --- lib/Target/X86/X86AsmPrinter.h +++ lib/Target/X86/X86AsmPrinter.h @@ -137,6 +137,10 @@ } bool runOnMachineFunction(MachineFunction &F) override; + +protected: + /// Special implementation for X86 of synthetic reference generation in X86. + void emitXRaySyntheticRef(MCSymbol* Symbol) override; }; } // end namespace llvm Index: lib/Target/X86/X86AsmPrinter.cpp =================================================================== --- lib/Target/X86/X86AsmPrinter.cpp +++ lib/Target/X86/X86AsmPrinter.cpp @@ -31,6 +31,7 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -78,6 +78,23 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSubtargetInfo &STI); +/// emitXRaySyntheticRef - Emit a valid instruction instead of just garbage into +/// the code/text section. This should keep the decode pipeline happy that the +/// instruction is valid. +void X86AsmPrinter::emitXRaySyntheticRef(MCSymbol* Symbol) { + auto WordSizeBytes = MAI->getCodePointerSize(); + OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); + + // We know that the movabsq with a 8664-bit immediate operand takes 10 bytes. + // For us to mazimize the 16 bytes, we need to write out 6 bytes of nops. + OutStreamer->EmitInstruction( + MCInstBuilder(X86::MOV64ri) + .addReg(X86::R10) + .addExpr(MCSymbolRefExpr::create(Symbol, OutContext)), + getSubtargetInfo()); + EmitNops(*OutStreamer, 6u, getSubtarget().is64Bit(), getSubtargetInfo()); +} + void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, const MCSubtargetInfo &STI, MCCodeEmitter *CodeEmitter) { Index: test/CodeGen/X86/xray-attribute-instrumentation.ll =================================================================== --- test/CodeGen/X86/xray-attribute-instrumentation.ll +++ test/CodeGen/X86/xray-attribute-instrumentation.ll @@ -14,7 +14,8 @@ ; CHECK-NEXT: nopw %cs:512(%rax,%rax) } ; CHECK: .p2align 4, 0x90 -; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_0 +; CHECK-NEXT: movabsq {{.*}}xray_fn_idx_synth_0{{.*}} +; CHECK-NEXT: nopw 8(%rax,%rax) ; CHECK-NEXT: .section {{.*}}xray_instr_map ; CHECK-LABEL: Lxray_sleds_start0: ; CHECK: .quad {{.*}}xray_sled_0 @@ -51,7 +52,8 @@ ; CHECK-NEXT: nopw %cs:512(%rax,%rax) } ; CHECK: .p2align 4, 0x90 -; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_1 +; CHECK-NEXT: movabsq {{.*}}xray_fn_idx_synth_1{{.*}} +; CHECK-NEXT: nopw 8(%rax,%rax) ; CHECK-NEXT: .section {{.*}}xray_instr_map ; CHECK-LABEL: Lxray_sleds_start1: ; CHECK: .quad {{.*}}xray_sled_2 Index: test/CodeGen/X86/xray-tail-call-sled.ll =================================================================== --- test/CodeGen/X86/xray-tail-call-sled.ll +++ test/CodeGen/X86/xray-tail-call-sled.ll @@ -14,7 +14,8 @@ ; CHECK-NEXT: nopw %cs:512(%rax,%rax) } ; CHECK: .p2align 4, 0x90 -; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_0{{.*}} +; CHECK-NEXT: movabsq {{.*}}xray_fn_idx_synth_0{{.*}} +; CHECK-NEXT: nopw 8(%rax,%rax) ; CHECK-NEXT: .section {{.*}}xray_instr_map ; CHECK-LABEL: Lxray_sleds_start0: ; CHECK: .quad {{.*}}xray_sled_0 @@ -41,7 +42,8 @@ ret i32 %retval } ; CHECK: .p2align 4, 0x90 -; CHECK-NEXT: .quad {{.*}}xray_fn_idx_synth_1{{.*}} +; CHECK-NEXT: movabsq {{.*}}xray_fn_idx_synth_1{{.*}} +; CHECK-NEXT: nopw 8(%rax,%rax) ; CHECK-LABEL: Lxray_sleds_start1: ; CHECK: .quad {{.*}}xray_sled_2 ; CHECK: .quad {{.*}}xray_sled_3