Index: llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -179,6 +179,12 @@ bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, unsigned &RemainingSize) override; + bool padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize); + bool padInstructionViaPrefix(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize); bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; @@ -646,9 +652,113 @@ return getRelaxedOpcode(Inst, is16BitMode) != Inst.getOpcode(); } -bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, - MCCodeEmitter &Emitter, - unsigned &RemainingSize) { +static bool shouldAddPrefix(const MCInst &Inst, const MCInstrInfo &MCII) { + // No prefix can be added if AlignMaxPrefixSize is 0. + //if (AlignMaxPrefixSize == 0) + // return false; + +#if 0 + // TODO: need to ensure that expanded branch doesn't cross alignment boundary + // of any preceeding boundary align. Easiest way is to simply check starting + // offset and resulting size. + // The longer the instruction, the easier it is to cross boundary, prefixes + // should not be inserted before any branch affected by JCC Erratum even if it + // is asked to be aligned. + const MCInstrDesc &InstDesc = MCII.get(Inst.getOpcode()); + if (InstDesc.isBranch() || InstDesc.isCall() || InstDesc.isReturn()) + return false; +#endif + + // Linker may rewrite the instruction with variant symbol operand. + return !hasVariantSymbol(Inst); + } + +/// Choose which prefix should be inserted before the instruction. +/// +/// If there is one, use the existing segment override prefix. +/// If the target is 64-bit, use the CS. +/// If the target is 32-bit, +/// - If the instruction has a ESP/EBP base register, use SS. +/// - Otherwise use DS. +static uint8_t choosePrefix(const MCInst &Inst, + const MCInstrInfo &MCII, + const MCSubtargetInfo &STI) { + assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) && + "Prefixes can be added only in 32-bit or 64-bit mode."); + unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + + unsigned CurOp = X86II::getOperandBias(Desc); + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) + MemoryOperand += CurOp; + + unsigned SegmentReg = 0; + if (MemoryOperand >= 0) { + // Check for explicit segment override on memory operand. + SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); + } + + uint64_t Form = TSFlags & X86II::FormMask; + switch (Form) { + default: + break; + case X86II::RawFrmDstSrc: { + // Check segment override opcode prefix as needed (not for %ds). + if (Inst.getOperand(2).getReg() != X86::DS) + SegmentReg = Inst.getOperand(2).getReg(); + break; + } + case X86II::RawFrmSrc: { + // Check segment override opcode prefix as needed (not for %ds). + if (Inst.getOperand(1).getReg() != X86::DS) + SegmentReg = Inst.getOperand(1).getReg(); + break; + } + case X86II::RawFrmMemOffs: { + // Check segment override opcode prefix as needed. + SegmentReg = Inst.getOperand(1).getReg(); + break; + } + } + + switch (SegmentReg) { + case 0: + break; + case X86::CS: + return 0x2e; + case X86::SS: + return 0x36; + case X86::DS: + return 0x3e; + case X86::ES: + return 0x26; + case X86::FS: + return 0x64; + case X86::GS: + return 0x65; + } + + if (STI.hasFeature(X86::Mode64Bit)) + return 0x2e; + + if (MemoryOperand >= 0) { + unsigned CurOp = X86II::getOperandBias(Desc); + unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; + unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); + if (BaseReg == X86::ESP || BaseReg == X86::EBP) + return 0x36; + } + return 0x3e; + } + + +bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) { if (!canBeRelaxedForPadding(RF)) return false; @@ -672,6 +782,61 @@ RemainingSize -= Delta; return true; } +bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) { + if (RemainingSize == 0) + return false; + if (!shouldAddPrefix(RF.getInst(), *MCII)) + return false; + const unsigned OldSize = RF.getContents().size(); + if (OldSize == 15) + return false; + + // The number of prefixes is limited by AlignMaxPrefixSize for some + // performance reasons, so we need to compute how many prefixes can be + // added. + auto GetRemainingPrefixSize = [&](const MCInst &Inst) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Emitter.emitPrefix(Inst, VecOS, STI); + assert(Code.size() < 15 && "The number of prefixes must be less than 15."); + uint8_t ExistingPrefixSize = static_cast(Code.size()); + const uint8_t AlignMaxPrefixSize = 5; + if (AlignMaxPrefixSize > ExistingPrefixSize) + return (AlignMaxPrefixSize - ExistingPrefixSize); + return 0; + }; + const size_t PrefixBytesToAdd = + std::min(std::min((size_t)15-OldSize, (size_t)RemainingSize), + (size_t)GetRemainingPrefixSize(RF.getInst())); + if (PrefixBytesToAdd == 0) + return false; + + const uint8_t Prefix = choosePrefix(RF.getInst(), *MCII, STI); + SmallString<256> Code; + Code.append(PrefixBytesToAdd, Prefix); + Code.append(RF.getContents().begin(), RF.getContents().end()); + RF.getContents() = Code; + RemainingSize -= PrefixBytesToAdd; + + // Adjust the fixups for the change in offsets + for (auto &F : RF.getFixups()) { + F.setOffset(F.getOffset() + PrefixBytesToAdd); + } + + return true; +} + +bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, + MCCodeEmitter &Emitter, + unsigned &RemainingSize) { + bool Changed = false; + Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); + Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); + return Changed; +} + /// Write a sequence of optimal nops to the output, covering \p Count /// bytes. Index: llvm/test/MC/X86/align-via-relaxation.s =================================================================== --- llvm/test/MC/X86/align-via-relaxation.s +++ llvm/test/MC/X86/align-via-relaxation.s @@ -7,13 +7,13 @@ # Demonstrate that we can relax instructions to provide padding, not # just insert nops. jmps are being used for ease of demonstration. # CHECK: .text - # CHECK: 0: eb 1f jmp 31 - # CHECK: 2: e9 1a 00 00 00 jmp 26 - # CHECK: 7: e9 15 00 00 00 jmp 21 - # CHECK: c: e9 10 00 00 00 jmp 16 - # CHECK: 11: e9 0b 00 00 00 jmp 11 - # CHECK: 16: e9 06 00 00 00 jmp 6 - # CHECK: 1b: e9 01 00 00 00 jmp 1 + # CHECK: 0: eb 1f jmp 31 + # CHECK: 2: eb 1d jmp 29 + # CHECK: 4: eb 1b jmp 27 + # CHECK: 6: eb 19 jmp 25 + # CHECK: 8: 2e 2e eb 15 jmp 21 + # CHECK: c: 2e 2e 2e 2e 2e e9 0b 00 00 00 jmp 11 + # CHECK: 16: 2e 2e 2e 2e 2e e9 01 00 00 00 jmp 1 # CHECK: 20: cc int3 .p2align 4 jmp foo @@ -47,15 +47,14 @@ # fewer nops by relaxing the branch, even though we don't need to # CHECK: loop_preheader: # CHECK: 45: 48 85 c0 testq %rax, %rax - # CHECK: 48: 0f 8e 22 00 00 00 jle 34 - # CHECK: 4e: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) - # CHECK: 58: 0f 1f 84 00 00 00 00 00 nopl (%rax,%rax) + # CHECK: 48: 2e 2e 2e 2e 0f 8e 1e 00 00 00 jle 30 + # CHECK: 52: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:(%rax,%rax) + # CHECK: 5c: 0f 1f 40 00 nopl (%rax) # CHECK: loop_header: # CHECK: 60: 48 83 e8 01 subq $1, %rax # CHECK: 64: 48 85 c0 testq %rax, %rax # CHECK: 67: 7e 07 jle 7 - # CHECK: 69: e9 f2 ff ff ff jmp -14 - # CHECK: 6e: 66 90 nop + # CHECK: 69: 2e 2e e9 f0 ff ff ff jmp -16 # CHECK: loop_exit: # CHECK: 70: c3 retq .p2align 5