Index: llvm/include/llvm/MC/MCAsmBackend.h =================================================================== --- llvm/include/llvm/MC/MCAsmBackend.h +++ llvm/include/llvm/MC/MCAsmBackend.h @@ -46,6 +46,10 @@ const support::endianness Endian; + /// Return true if this target might automatically pad instructions and thus + /// need to emit padding enable/disable directives around sensative code. + virtual bool allowAutoPadding() const { return false; } + /// Give the target a chance to manipulate state related to instruction /// alignment (e.g. padding for optimization) before and after actually /// emitting the instruction. Index: llvm/include/llvm/MC/MCStreamer.h =================================================================== --- llvm/include/llvm/MC/MCStreamer.h +++ llvm/include/llvm/MC/MCStreamer.h @@ -222,6 +222,13 @@ bool UseAssemblerInfoForParsing; + /// Is the assembler allowed to insert padding automatically? For + /// correctness reasons, we sometimes need to ensure instructions aren't + /// seperated in unexpected ways. At the moment, this feature is only + /// useable from an integrated assembler, but assembly syntax is under + /// discussion for future inclusion. + bool AllowAutoPadding = false; + protected: MCStreamer(MCContext &Ctx); @@ -266,6 +273,9 @@ return TargetStreamer.get(); } + void setAllowAutoPadding(bool v) { AllowAutoPadding = v; } + bool getAllowAutoPadding() const { return AllowAutoPadding; } + /// When emitting an object file, create and emit a real label. When emitting /// textual assembly, this should do nothing to avoid polluting our output. virtual MCSymbol *EmitCFILabel(); Index: llvm/lib/MC/MCAsmStreamer.cpp =================================================================== --- llvm/lib/MC/MCAsmStreamer.cpp +++ llvm/lib/MC/MCAsmStreamer.cpp @@ -77,6 +77,7 @@ assert(InstPrinter); if (IsVerboseAsm) InstPrinter->setCommentStream(CommentStream); + setAllowAutoPadding(Assembler->getBackend().allowAutoPadding()); } MCAssembler &getAssembler() { return *Assembler; } Index: llvm/lib/MC/MCObjectStreamer.cpp =================================================================== --- llvm/lib/MC/MCObjectStreamer.cpp +++ llvm/lib/MC/MCObjectStreamer.cpp @@ -29,7 +29,9 @@ : MCStreamer(Context), Assembler(std::make_unique( Context, std::move(TAB), std::move(Emitter), std::move(OW))), - EmitEHFrame(true), EmitDebugFrame(false) {} + EmitEHFrame(true), EmitDebugFrame(false) { + setAllowAutoPadding(Assembler->getBackend().allowAutoPadding()); +} MCObjectStreamer::~MCObjectStreamer() {} Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -155,6 +155,7 @@ AlignBranchType = X86AlignBranchKindLoc; } + bool allowAutoPadding() const override; void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override; void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override; @@ -410,10 +411,15 @@ return false; } +bool X86AsmBackend::allowAutoPadding() const { + return (AlignBoundary != Align::None() && + AlignBranchType != X86::AlignBranchNone); +} + bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const { - if (AlignBoundary == Align::None() || - AlignBranchType == X86::AlignBranchNone) + if (!OS.getAllowAutoPadding()) return false; + assert(allowAutoPadding() && "incorrect initialization!"); MCAssembler &Assembler = OS.getAssembler(); MCSection *Sec = OS.getCurrentSectionOnly(); Index: llvm/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/lib/Target/X86/X86MCInstLower.cpp +++ llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1142,10 +1142,37 @@ } } +/// A RAII helper which defines a region of instructions which can't have +/// padding added between them for correctness. +struct NoAutoPaddingScope { + MCStreamer &OS; + bool OldAllowAutoPadding; + NoAutoPaddingScope(MCStreamer &OS) + : OS(OS) { + OldAllowAutoPadding = OS.getAllowAutoPadding(); + changeAndComment(false); + } + ~NoAutoPaddingScope() { + changeAndComment(OldAllowAutoPadding); + } + + void changeAndComment(bool b) { + if (b == OS.getAllowAutoPadding()) + return; + OS.setAllowAutoPadding(b); + if (b) + OS.emitRawComment("autopadding"); + else + OS.emitRawComment("noautopadding"); + } +}; + void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL) { assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); + NoAutoPaddingScope NoPadScope(*OutStreamer); + StatepointOpers SOpers(&MI); if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(), @@ -1207,6 +1234,8 @@ // FAULTING_LOAD_OP , , , // , + NoAutoPaddingScope NoPadScope(*OutStreamer); + Register DefRegister = FaultingMI.getOperand(0).getReg(); FaultMaps::FaultKind FK = static_cast(FaultingMI.getOperand(1).getImm()); @@ -1253,6 +1282,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, X86MCInstLower &MCIL) { // PATCHABLE_OP minsize, opcode, operands + + NoAutoPaddingScope NoPadScope(*OutStreamer); unsigned MinSize = MI.getOperand(0).getImm(); unsigned Opcode = MI.getOperand(1).getImm(); @@ -1291,7 +1322,7 @@ // , , ... void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); - + auto &Ctx = OutStreamer->getContext(); MCSymbol *MILabel = Ctx.createTempSymbol(); OutStreamer->EmitLabel(MILabel); @@ -1309,6 +1340,8 @@ SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); + NoAutoPaddingScope NoPadScope(*OutStreamer); + auto &Ctx = OutStreamer->getContext(); MCSymbol *MILabel = Ctx.createTempSymbol(); OutStreamer->EmitLabel(MILabel); @@ -1368,6 +1401,8 @@ X86MCInstLower &MCIL) { assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); + NoAutoPaddingScope NoPadScope(*OutStreamer); + // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. // @@ -1462,6 +1497,8 @@ X86MCInstLower &MCIL) { assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); + NoAutoPaddingScope NoPadScope(*OutStreamer); + // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. // @@ -1559,6 +1596,9 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { + + NoAutoPaddingScope NoPadScope(*OutStreamer); + // We want to emit the following pattern: // // .p2align 1, ... @@ -1586,6 +1626,8 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL) { + NoAutoPaddingScope NoPadScope(*OutStreamer); + // Since PATCHABLE_RET takes the opcode of the return statement as an // argument, we use that to emit the correct form of the RET that we want. // i.e. when we see this: @@ -1616,6 +1658,8 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { + NoAutoPaddingScope NoPadScope(*OutStreamer); + // Like PATCHABLE_RET, we have the actual instruction in the operands to this // instruction so we lower that particular instruction and its operands. // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how Index: llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -O3 -mcpu=skylake -x86-align-branch-boundary=32 -x86-align-branch=call -filetype=obj < %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s + +;; This file is a companion to align-branch-boundary-suppressions.ll. +;; It exists to demonstrate that suppressions are actually wired into the +;; integrated assembler. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define void @test_statepoint(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK: 1f: callq +entry: + ; Each of these will be 5 bytes, pushing the statepoint to offset=30. + ; For a normal call, this would force padding between the last normal + ; call and the safepoint, but since we've suppressed alignment that won't + ; happen for the safepoint. That's non-ideal, we'd really prefer to do + ; the alignment and just keep the label with the statepoint call. (TODO) + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) + ret void +} + +declare void @foo() +declare zeroext i1 @return_i1() +declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) Index: llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -O3 -enable-implicit-null-checks -mcpu=skylake -x86-align-branch-boundary=32 -x86-align-branch=call+jmp+indirect+ret+jcc < %s | FileCheck %s + +;; The tests in this file check that various constructs which need to disable +;; prefix and/or nop padding do so in the right places. However, since we +;; don't yet have assembler syntax for this, they're only able to check +;; comments and must hope the assembler does the right thing. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +; If we have autopadding enabled, make sure the label isn't separated from +; the mov. +define i32 @implicit_null_check(i32* %x) { +; CHECK-LABEL: implicit_null_check: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #noautopadding +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: movl (%rdi), %eax # on-fault: .LBB0_1 +; CHECK-NEXT: #autopadding +; CHECK-NEXT: # %bb.2: # %not_null +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_1: # %is_null +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !{} + + is_null: + ret i32 42 + + not_null: + %t = load atomic i32, i32* %x unordered, align 4 + ret i32 %t +} + +; Label must bind to call before +define void @test_statepoint(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test_statepoint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: #noautopadding +; CHECK-NEXT: callq return_i1 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: #autopadding +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) + ret void +} + +declare zeroext i1 @return_i1() +declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) + + +; Label must bind to following nop sequence +define void @patchpoint(i64 %a, i64 %b) { +; CHECK-LABEL: patchpoint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: #noautopadding +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: #autopadding +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +entry: + call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* null, i32 0, i64 %a, i64 %b) + ret void +} + + +declare void @llvm.experimental.stackmap(i64, i32, ...) +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)