diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -264,14 +264,14 @@ // now we're assuming we can mprotect the whole section of text between the // minimum sled address and the maximum sled address (+ the largest sled // size). - auto MinSled = InstrMap.Sleds[0]; - auto MaxSled = InstrMap.Sleds[InstrMap.Entries - 1]; + auto *MinSled = &InstrMap.Sleds[0]; + auto *MaxSled = &InstrMap.Sleds[InstrMap.Entries - 1]; for (std::size_t I = 0; I < InstrMap.Entries; I++) { const auto &Sled = InstrMap.Sleds[I]; - if (Sled.Address < MinSled.Address) - MinSled = Sled; - if (Sled.Address > MaxSled.Address) - MaxSled = Sled; + if (Sled.address() < MinSled->address()) + MinSled = &Sled; + if (Sled.address() > MaxSled->address()) + MaxSled = &Sled; } const size_t PageSize = flags()->xray_page_size_override > 0 @@ -283,9 +283,10 @@ } void *PageAlignedAddr = - reinterpret_cast(MinSled.Address & ~(PageSize - 1)); + reinterpret_cast(MinSled->address() & ~(PageSize - 1)); size_t MProtectLen = - (MaxSled.Address - reinterpret_cast(PageAlignedAddr)) + cSledLength; + (MaxSled->address() - reinterpret_cast(PageAlignedAddr)) + + cSledLength; MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); if (Protector.MakeWriteable() == -1) { Report("Failed mprotect: %d\n", errno); @@ -337,20 +338,21 @@ auto SledRange = InstrMap.SledsIndex[FuncId - 1]; auto *f = SledRange.Begin; auto *e = SledRange.End; - auto MinSled = *f; - auto MaxSled = *(SledRange.End - 1); + auto *MinSled = f; + auto *MaxSled = (SledRange.End - 1); while (f != e) { - if (f->Address < MinSled.Address) - MinSled = *f; - if (f->Address > MaxSled.Address) - MaxSled = *f; + if (f->address() < MinSled->address()) + MinSled = f; + if (f->address() > MaxSled->address()) + MaxSled = f; ++f; } void *PageAlignedAddr = - reinterpret_cast(MinSled.Address & ~(PageSize - 1)); + reinterpret_cast(MinSled->address() & ~(PageSize - 1)); size_t MProtectLen = - (MaxSled.Address - reinterpret_cast(PageAlignedAddr)) + cSledLength; + (MaxSled->address() - reinterpret_cast(PageAlignedAddr)) + + cSledLength; MProtectHelper Protector(PageAlignedAddr, MProtectLen, PageSize); if (Protector.MakeWriteable() == -1) { Report("Failed mprotect: %d\n", errno); diff --git a/compiler-rt/lib/xray/xray_interface_internal.h b/compiler-rt/lib/xray/xray_interface_internal.h --- a/compiler-rt/lib/xray/xray_interface_internal.h +++ b/compiler-rt/lib/xray/xray_interface_internal.h @@ -29,6 +29,18 @@ unsigned char AlwaysInstrument; unsigned char Version; unsigned char Padding[13]; // Need 32 bytes + uint64_t address() const { +#ifndef __x86_64__ + // R_MIPS_PC64 does not exist. Use absolute address even for version 2. + return Address; +#else + // TODO Eventually all targets but MIPS64 should take this branch. + if (Version < 2) + return Address; + // The target address is relative to the location of the Address variable. + return reinterpret_cast(&Address) + Address; +#endif + } #elif SANITIZER_WORDSIZE == 32 uint32_t Address; uint32_t Function; @@ -36,6 +48,7 @@ unsigned char AlwaysInstrument; unsigned char Version; unsigned char Padding[5]; // Need 16 bytes + uint32_t address() const { return Address; } #else #error "Unsupported word size." #endif diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp --- a/compiler-rt/lib/xray/xray_x86_64.cpp +++ b/compiler-rt/lib/xray/xray_x86_64.cpp @@ -151,7 +151,7 @@ // opcode and first operand. // // Prerequisite is to compute the relative offset to the trampoline's address. - const uint64_t Address = Sled.Address; + const uint64_t Address = Sled.address(); int64_t TrampolineOffset = reinterpret_cast(Trampoline) - (static_cast(Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { @@ -197,7 +197,7 @@ // // Prerequisite is to compute the relative offset fo the // __xray_FunctionExit function's address. - const uint64_t Address = Sled.Address; + const uint64_t Address = Sled.address(); int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionExit) - (static_cast(Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { @@ -225,7 +225,7 @@ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { // Here we do the dance of replacing the tail call sled with a similar // sequence as the entry sled, but calls the tail exit sled instead. - const uint64_t Address = Sled.Address; + const uint64_t Address = Sled.address(); int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionTailExit) - (static_cast(Address) + 11); @@ -270,12 +270,12 @@ // // --- // - // In Version 1: + // In Version 1 or 2: // // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back // to a jmp, use 15 bytes instead. // - const uint64_t Address = Sled.Address; + const uint64_t Address = Sled.address(); if (Enable) { std::atomic_store_explicit( reinterpret_cast *>(Address), NopwSeq, @@ -283,6 +283,7 @@ } else { switch (Sled.Version) { case 1: + case 2: std::atomic_store_explicit( reinterpret_cast *>(Address), Jmp15Seq, std::memory_order_release); @@ -317,7 +318,7 @@ // unstashes the registers and returns. If the arguments are already in // the correct registers, the stashing and unstashing become equivalently // sized nops. - const uint64_t Address = Sled.Address; + const uint64_t Address = Sled.address(); if (Enable) { std::atomic_store_explicit( reinterpret_cast *>(Address), NopwSeq, diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -286,7 +286,7 @@ const class Function *Fn; uint8_t Version; - void emit(int, MCStreamer *, const MCSymbol *) const; + void emit(int, MCStreamer *, const MCExpr *, const MCSymbol *) const; }; // All the sleds to be emitted. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -3188,8 +3188,12 @@ // describes each instrumentation point. When XRay patches your code, the index // into this table will be given to your handler as a patch point identifier. void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, + const MCExpr *Location, const MCSymbol *CurrentFnSym) const { - Out->emitSymbolValue(Sled, Bytes); + if (Location) + Out->emitValueImpl(Location, Bytes); + else + Out->emitSymbolValue(Sled, Bytes); Out->emitSymbolValue(CurrentFnSym, Bytes); auto Kind8 = static_cast(Kind); Out->emitBinaryData(StringRef(reinterpret_cast(&Kind8), 1)); @@ -3209,9 +3213,13 @@ const Function &F = MF->getFunction(); MCSection *InstMap = nullptr; MCSection *FnSledIndex = nullptr; - if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + const Triple &TT = TM.getTargetTriple(); + bool PCRel = TT.isX86(); + if (TT.isOSBinFormatELF()) { auto LinkedToSym = cast(CurrentFnSym); - auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; + auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; + if (!PCRel) + Flags |= ELF::SHF_WRITE; StringRef GroupName; if (F.hasComdat()) { Flags |= ELF::SHF_GROUP; @@ -3240,8 +3248,17 @@ MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); OutStreamer->SwitchSection(InstMap); OutStreamer->emitLabel(SledsStart); - for (const auto &Sled : Sleds) - Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); + for (const auto &Sled : Sleds) { + const MCExpr *Location = nullptr; + if (PCRel) { + MCSymbol *Dot = OutContext.createTempSymbol(); + OutStreamer->emitLabel(Dot); + Location = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(Sled.Sled, OutContext), + MCSymbolRefExpr::create(Dot, OutContext), OutContext); + } + Sled.emit(WordSizeBytes, OutStreamer.get(), Location, CurrentFnSym); + } MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); OutStreamer->emitLabel(SledsEnd); diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1511,7 +1511,7 @@ // Record the sled version. Older versions of this sled were spelled // differently, so we let the runtime handle the different offsets we're // using. - recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); + recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2); } void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, @@ -1612,7 +1612,7 @@ OutStreamer->AddComment("xray typed event end."); // Record the sled version. - recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0); + recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2); } void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, @@ -1652,7 +1652,7 @@ // FIXME: Find another less hacky way do force the relative jump. OutStreamer->emitBytes("\xeb\x09"); EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); - recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); + recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); } void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, @@ -1684,7 +1684,7 @@ Ret.addOperand(MaybeOperand.getValue()); OutStreamer->emitInstruction(Ret, getSubtargetInfo()); EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo()); - recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); + recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); } void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, @@ -1708,7 +1708,7 @@ OutStreamer->emitBytes("\xeb\x09"); EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); OutStreamer->emitLabel(Target); - recordSled(CurSled, MI, SledKind::TAIL_CALL); + recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); unsigned OpCode = MI.getOperand(0).getImm(); OpCode = convertTailJumpOpcode(OpCode); diff --git a/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll --- a/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll @@ -49,9 +49,12 @@ } ; CHECK-LABEL: xray_instr_map ; CHECK-LABEL: Lxray_sleds_start1: -; CHECK: .quad {{.*}}xray_sled_2 -; CHECK: .quad {{.*}}xray_sled_3 -; CHECK: .quad {{.*}}xray_sled_4 +; CHECK: Ltmp2: +; CHECK-NEXT: .quad {{.*}}xray_sled_2-{{\.?}}Ltmp2 +; CHECK: Ltmp3: +; CHECK-NEXT: .quad {{.*}}xray_sled_3-{{\.?}}Ltmp3 +; CHECK: Ltmp4: +; CHECK-NEXT: .quad {{.*}}xray_sled_4-{{\.?}}Ltmp4 ; CHECK-LABEL: Lxray_sleds_end1: ; CHECK-LABEL: xray_fn_idx ; CHECK: .quad {{.*}}xray_sleds_start1 diff --git a/llvm/test/CodeGen/X86/xray-log-args.ll b/llvm/test/CodeGen/X86/xray-log-args.ll --- a/llvm/test/CodeGen/X86/xray-log-args.ll +++ b/llvm/test/CodeGen/X86/xray-log-args.ll @@ -7,17 +7,19 @@ ret i32 %arg } ; CHECK-LABEL: Lxray_sleds_start0: -; CHECK: .quad {{\.?}}Lxray_sled_0 -; CHECK: .quad {{_?}}callee -; CHECK: .byte 0x03 -; CHECK: .byte 0x01 -; CHECK: .byte 0x00 -; CHECK: .{{(zero|space)}} 13 -; CHECK: .quad {{\.?}}Lxray_sled_1 -; CHECK: .quad {{_?}}callee -; CHECK: .byte 0x01 -; CHECK: .byte 0x01 -; CHECK: .byte 0x00 +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: .quad {{\.?}}Lxray_sled_0-{{\.?}}Ltmp0 +; CHECK-NEXT: .quad {{_?}}callee +; CHECK-NEXT: .byte 0x03 +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x02 +; CHECK: .{{(zero|space)}} 13 +; CHECK: Ltmp1: +; CHECK-NEXT: .quad {{\.?}}Lxray_sled_1-{{\.?}}Ltmp1 +; CHECK-NEXT: .quad {{_?}}callee +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x02 ; CHECK: .{{(zero|space)}} 13 define i32 @caller(i32 %arg) nounwind noinline uwtable "function-instrument"="xray-always" "xray-log-args"="1" { @@ -25,15 +27,17 @@ ret i32 %retval } ; CHECK-LABEL: Lxray_sleds_start1: -; CHECK: .quad {{\.?}}Lxray_sled_2 -; CHECK: .quad {{_?}}caller -; CHECK: .byte 0x03 -; CHECK: .byte 0x01 -; CHECK: .byte 0x00 +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: .quad {{\.?}}Lxray_sled_2-{{\.?}}Ltmp3 +; CHECK-NEXT: .quad {{_?}}caller +; CHECK-NEXT: .byte 0x03 +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x02 ; CHECK: .{{(zero|space)}} 13 -; CHECK: .quad {{\.?}}Lxray_sled_3 -; CHECK: .quad {{_?}}caller -; CHECK: .byte 0x02 -; CHECK: .byte 0x01 -; CHECK: .byte 0x00 +; CHECK: Ltmp4: +; CHECK-NEXT: .quad {{\.?}}Lxray_sled_3-{{\.?}}Ltmp4 +; CHECK-NEXT: .quad {{_?}}caller +; CHECK-NEXT: .byte 0x02 +; CHECK-NEXT: .byte 0x01 +; CHECK-NEXT: .byte 0x02 ; CHECK: .{{(zero|space)}} 13 diff --git a/llvm/test/CodeGen/X86/xray-section-group.ll b/llvm/test/CodeGen/X86/xray-section-group.ll --- a/llvm/test/CodeGen/X86/xray-section-group.ll +++ b/llvm/test/CodeGen/X86/xray-section-group.ll @@ -5,14 +5,14 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .section .text.foo,"ax",@progbits ret i32 0 -; CHECK: .section xray_instr_map,"awo",@progbits,foo{{$}} +; CHECK: .section xray_instr_map,"ao",@progbits,foo{{$}} } $bar = comdat any define i32 @bar() nounwind noinline uwtable "function-instrument"="xray-always" comdat($bar) { ; CHECK: .section .text.bar,"axG",@progbits,bar,comdat ret i32 1 -; CHECK: .section xray_instr_map,"aGwo",@progbits,bar,comdat,bar{{$}} +; CHECK: .section xray_instr_map,"aGo",@progbits,bar,comdat,bar{{$}} } ; CHECK-OBJ: section xray_instr_map: