Index: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp +++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp @@ -1109,7 +1109,19 @@ Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); } + + // Before we switch over, we force a reference to a label inside the + // xray_instr_map section. Since EmitXRayTable() is always called just + // before the function's end, we assume that this is happening after the + // last return instruction. + // + // We then align the reference to 16 byte boundaries, which we determined + // experimentally to be beneficial to avoid causing decoder stalls. + MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); + OutStreamer->EmitCodeAlignment(16); + OutStreamer->EmitSymbolValue(Tmp, 8, false); OutStreamer->SwitchSection(Section); + OutStreamer->EmitLabel(Tmp); for (const auto &Sled : Sleds) { OutStreamer->EmitSymbolValue(Sled.Sled, 8); OutStreamer->EmitSymbolValue(CurrentFnSym, 8); Index: llvm/trunk/test/CodeGen/X86/xray-attribute-instrumentation.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xray-attribute-instrumentation.ll +++ llvm/trunk/test/CodeGen/X86/xray-attribute-instrumentation.ll @@ -12,3 +12,9 @@ ; CHECK-NEXT: retq ; CHECK-NEXT: nopw %cs:512(%rax,%rax) } +; CHECK: .p2align 4, 0x90 +; CHECK-NEXT: .quad .Lxray_synthetic_0 +; CHECK-NEXT: .section xray_instr_map,{{.*}} +; CHECK-LABEL: Lxray_synthetic_0: +; CHECK: .quad .Lxray_sled_0 +; CHECK: .quad .Lxray_sled_1