Index: ELF/Relocations.h =================================================================== --- ELF/Relocations.h +++ ELF/Relocations.h @@ -126,9 +126,11 @@ private: void mergeThunks(); void createInitialThunkSections(ArrayRef OutputSections); - ThunkSection *getOSThunkSec(OutputSection *OS, InputSection *IS); + ThunkSection *getOSThunkSec(OutputSection *OS, InputSection *IS, + uint32_t Type, uint64_t Src); ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS); - std::pair getThunk(SymbolBody &Body, uint32_t Type); + std::pair getThunk(SymbolBody &Body, uint32_t Type, + uint64_t SrcAddr); ThunkSection *addThunkSection(OutputSection *OS, uint64_t Off); uint32_t Pass = 0; Index: ELF/Relocations.cpp =================================================================== --- ELF/Relocations.cpp +++ ELF/Relocations.cpp @@ -989,7 +989,7 @@ // to a Target specific branch range. For an OutputSection that is smaller than // the range then a single ThunkSection at the end will do. template -void elf::ThunkCreator::createInitialThunkSections( +void ThunkCreator::createInitialThunkSections( ArrayRef OutputSections) { bool NeedTrailingTS = true; @@ -1017,14 +1017,22 @@ } } +// Find or create a non-inline ThunkSection in this OS to place Thunk template ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS, - InputSection *IS) { - // Until range extension thunks just return the first section + InputSection *IS, uint32_t Type, + uint64_t Src) { + // Use the simple heuristic of inserting the Thunk in the first ThunkSection + // in range of the caller. for (ThunkSection *TS : ThunkSections[OS]) { - return TS; + uint64_t TSBase = OS->Addr + TS->OutSecOff; + uint64_t TSLimit = TSBase + TS->getSize(); + if (Target->inBranchRange(Type, Src, (Src > TSLimit) ? TSBase : TSLimit)) + return TS; } - // No suitable ThunkSection exists. + // No suitable ThunkSection exists. This can happen when there is a branch + // with lower range than the ThunkSection spacing or when there are too + // many Thunks. Create a new ThunkSection. return addThunkSection(OS, IS->OutSecOff); } @@ -1042,13 +1050,15 @@ template std::pair ThunkCreator::getThunk(SymbolBody &Body, - uint32_t Type) { + uint32_t Type, + uint64_t SrcAddr) { Thunk *T = nullptr; auto Res = ThunkedSymbols.insert({&Body, std::vector()}); if (Res.second == false) { // Check existing Thunks for Body to see if they can be reused for (Thunk *ET : Res.first->second) - if (ET->compatibleWith(Type)) { + if (ET->compatibleWith(Type) && + Target->inBranchRange(Type, SrcAddr, ET->ThunkSym->getVA())) { T = ET; return std::make_pair(T, Res.second); } @@ -1077,16 +1087,13 @@ // each InputSection. It must be called before the static symbol table is // finalized. If any Thunks are added to an OutputSection the output section // offsets of the InputSections will change. -// -// FIXME: All Thunks are assumed to be in range of the relocation. Range -// extension Thunks are not yet supported. template bool ThunkCreator::createThunks( ArrayRef OutputSections) { bool AddressesChanged = false; if (Pass > 0) { NewThunkSections.clear(); - if (Pass == 2) + if (Pass == 10) // With existing Thunks pass 0 will create Thunks, pass 1 will // create no more Thunks so if we get to 2 something has gone wrong. fatal("Thunk creation not converged in sufficient number of passes"); @@ -1107,13 +1114,14 @@ continue; for (Relocation &Rel : IS->Relocations) { SymbolBody &Body = *Rel.Sym; + uint64_t SrcAddr = OS->Addr + IS->OutSecOff + Rel.Offset; // If this Relocation is using a Thunk, use it. if (Thunks.find(&Body) != Thunks.end()) continue; - if (Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body)) { + if (Target->needsThunk(Rel.Expr, Rel.Type, IS->File, SrcAddr, Body)) { Thunk *T; bool IsNew; - std::tie(T, IsNew) = getThunk(Body, Rel.Type); + std::tie(T, IsNew) = getThunk(Body, Rel.Type, SrcAddr); if (IsNew) { AddressesChanged = true; // Find or create a ThunkSection for the new Thunk @@ -1121,7 +1129,7 @@ if (auto *TIS = T->getTargetInputSection()) TS = getISThunkSec(TIS, OS); else - TS = getOSThunkSec(OS, IS); + TS = getOSThunkSec(OS, IS, Rel.Type, SrcAddr); TS->addThunk(T); Thunks[T->ThunkSym] = T; } Index: ELF/Target.h =================================================================== --- ELF/Target.h +++ ELF/Target.h @@ -53,7 +53,10 @@ // Decide whether a Thunk is needed for the relocation from File // targeting S. virtual bool needsThunk(RelExpr Expr, uint32_t RelocType, - const InputFile *File, const SymbolBody &S) const; + const InputFile *File, uint64_t BranchAddr, + const SymbolBody &S) const; + virtual bool inBranchRange(uint32_t RelocType, uint64_t Src, + uint64_t Dst) const; virtual RelExpr getRelExpr(uint32_t Type, const SymbolBody &S) const = 0; virtual void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const = 0; virtual ~TargetInfo(); Index: ELF/Target.cpp =================================================================== --- ELF/Target.cpp +++ ELF/Target.cpp @@ -229,7 +229,9 @@ void addPltSymbols(InputSectionBase *IS, uint64_t Off) const override; void addPltHeaderSymbols(InputSectionBase *ISD) const override; bool needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, - const SymbolBody &S) const override; + uint64_t BranchAddr, const SymbolBody &S) const override; + bool inBranchRange(uint32_t RelocType, uint64_t Src, + uint64_t Dst) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; }; @@ -247,7 +249,7 @@ void writePlt(uint8_t *Buf, uint64_t GotEntryAddr, uint64_t PltEntryAddr, int32_t Index, unsigned RelOff) const override; bool needsThunk(RelExpr Expr, uint32_t RelocType, const InputFile *File, - const SymbolBody &S) const override; + uint64_t BranchAddr, const SymbolBody &S) const override; void relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const override; bool usesOnlyLowPageBits(uint32_t Type) const override; }; @@ -298,10 +300,16 @@ bool TargetInfo::usesOnlyLowPageBits(uint32_t Type) const { return false; } bool TargetInfo::needsThunk(RelExpr Expr, uint32_t RelocType, - const InputFile *File, const SymbolBody &S) const { + const InputFile *File, uint64_t BranchAddr, + const SymbolBody &S) const { return false; } +bool TargetInfo::inBranchRange(uint32_t RelocType, uint64_t Src, + uint64_t Dst) const { + return true; +} + bool TargetInfo::isTlsInitialExecRel(uint32_t Type) const { return false; } bool TargetInfo::isTlsLocalDynamicRel(uint32_t Type) const { return false; } @@ -1668,6 +1676,10 @@ // ARM uses Variant 1 TLS TcbSize = 8; NeedsThunks = true; + // Thumb unconditional branch range on system with Thumb2 branch encoding + ThunkSectionSpacing = 0x1000000; + // Allow for 16384 12 byte Thunks per ThunkSectionSpacing + ThunkSectionSize = 0x30000; } RelExpr ARMTargetInfo::getRelExpr(uint32_t Type, const SymbolBody &S) const { @@ -1792,7 +1804,7 @@ } bool ARMTargetInfo::needsThunk(RelExpr Expr, uint32_t RelocType, - const InputFile *File, + const InputFile *File, uint64_t BranchAddr, const SymbolBody &S) const { // If S is an undefined weak symbol in an executable we don't need a Thunk. // In a DSO calls to undefined symbols, including weak ones get PLT entries @@ -1800,6 +1812,7 @@ if (S.isUndefined() && !S.isLocal() && S.symbol()->isWeak() && !Config->Shared) return false; + // A state change from ARM to Thumb and vice versa must go through an // interworking thunk if the relocation type is not R_ARM_CALL or // R_ARM_THM_CALL. @@ -1811,18 +1824,71 @@ // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). if (Expr == R_PC && ((S.getVA() & 1) == 1)) return true; + // Fall through + case R_ARM_CALL: { + uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA(); + return !inBranchRange(RelocType, BranchAddr, Dst); break; + } case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: // Source is Thumb, all PLT entries are ARM so interworking is required. // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). if (Expr == R_PLT_PC || ((S.getVA() & 1) == 0)) return true; - break; + // Fall through + case R_ARM_THM_CALL: { + uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA(); + return !inBranchRange(RelocType, BranchAddr, Dst); + } } return false; } +bool ARMTargetInfo::inBranchRange(uint32_t RelocType, uint64_t Src, + uint64_t Dst) const { + uint64_t Range; + uint64_t InstrSize; + switch (RelocType) { + case R_ARM_PC24: + case R_ARM_PLT32: + case R_ARM_JUMP24: + case R_ARM_CALL: + Range = 0x2000000; + InstrSize = 4; + break; + case R_ARM_THM_JUMP19: + Range = 0x100000; + InstrSize = 2; + break; + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + Range = 0x1000000; + InstrSize = 2; + break; + default: + return true; + } + // PC at Src is 2 instructions ahead, immediate of branch is signed + if (Src > Dst) + Range -= 2 * InstrSize; + else + Range += InstrSize; + + if ((Dst & 0x1) == 0) + // Destination is ARM, if ARM caller then Src is already 4-byte aligned. + // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure + // destination will be 4 byte aligned. + Src &= ~0x3; + else + // Bit 0 == 1 denotes Thumb state, it is not part of the range + Dst &= ~0x1; + + uint64_t Distance = (Src > Dst) ? Src - Dst : Dst - Src; + + return Distance <= Range; +} + void ARMTargetInfo::relocateOne(uint8_t *Loc, uint32_t Type, uint64_t Val) const { switch (Type) { @@ -2238,6 +2304,7 @@ template bool MipsTargetInfo::needsThunk(RelExpr Expr, uint32_t Type, const InputFile *File, + uint64_t BranchAddr, const SymbolBody &S) const { // Any MIPS PIC code function is invoked with its address in register $t9. // So if we have a branch instruction from non-PIC code to the PIC one Index: ELF/Thunks.cpp =================================================================== --- ELF/Thunks.cpp +++ ELF/Thunks.cpp @@ -266,11 +266,13 @@ case R_ARM_PC24: case R_ARM_PLT32: case R_ARM_JUMP24: + case R_ARM_CALL: if (Config->Pic) return make>(S); return make>(S); case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: if (Config->Pic) return make>(S); return make>(S); Index: test/ELF/arm-branch-error.s =================================================================== --- test/ELF/arm-branch-error.s +++ /dev/null @@ -1,19 +0,0 @@ -// RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t -// RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %S/Inputs/far-arm-abs.s -o %tfar -// RUN: not ld.lld %t %tfar -o %t2 2>&1 | FileCheck %s -// REQUIRES: arm - .syntax unified - .section .text, "ax",%progbits - .globl _start - .balign 0x10000 - .type _start,%function -_start: - // address of too_far symbols are just out of range of ARM branch with - // 26-bit immediate field and an addend of -8 - bl too_far1 - b too_far2 - beq too_far3 - -// CHECK: R_ARM_CALL out of range -// CHECK-NEXT: R_ARM_JUMP24 out of range -// CHECK-NEXT: R_ARM_JUMP24 out of range Index: test/ELF/arm-branch-rangethunk.s =================================================================== --- /dev/null +++ test/ELF/arm-branch-rangethunk.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %s -o %t +// RUN: llvm-mc -filetype=obj -triple=armv7a-none-linux-gnueabi %S/Inputs/far-arm-abs.s -o %tfar +// RUN: ld.lld %t %tfar -o %t2 2>&1 +// RUN: llvm-objdump -d -triple=armv7a-none-linux-gnueabi %t2 | FileCheck %s +// REQUIRES: arm + .syntax unified + .section .text, "ax",%progbits + .globl _start + .balign 0x10000 + .type _start,%function +_start: + // address of too_far symbols are just out of range of ARM branch with + // 26-bit immediate field and an addend of -8 + bl too_far1 + b too_far2 + beq too_far3 + +// CHECK: Disassembly of section .text: +// CHECK-NEXT: _start: +// CHECK-NEXT: 20000: 01 00 00 eb bl #4 <__ARMv7ABSLongThunk_too_far1> +// CHECK-NEXT: 20004: 03 00 00 ea b #12 <__ARMv7ABSLongThunk_too_far2> +// CHECK-NEXT: 20008: 05 00 00 0a beq #20 <__ARMv7ABSLongThunk_too_far3> +// CHECK: __ARMv7ABSLongThunk_too_far1: +// CHECK-NEXT: 2000c: 08 c0 00 e3 movw r12, #8 +// CHECK-NEXT: 20010: 02 c2 40 e3 movt r12, #514 +// CHECK-NEXT: 20014: 1c ff 2f e1 bx r12 +// CHECK: __ARMv7ABSLongThunk_too_far2: +// CHECK-NEXT: 20018: 0c c0 00 e3 movw r12, #12 +// CHECK-NEXT: 2001c: 02 c2 40 e3 movt r12, #514 +// CHECK-NEXT: 20020: 1c ff 2f e1 bx r12 +// CHECK: __ARMv7ABSLongThunk_too_far3: +// CHECK-NEXT: 20024: 10 c0 00 e3 movw r12, #16 +// CHECK-NEXT: 20028: 02 c2 40 e3 movt r12, #514 +// CHECK-NEXT: 2002c: 1c ff 2f e1 bx r12 Index: test/ELF/arm-thumb-branch-error.s =================================================================== --- test/ELF/arm-thumb-branch-error.s +++ /dev/null @@ -1,19 +0,0 @@ -// RUN: llvm-mc -filetype=obj -triple=thumbv7a-none-linux-gnueabi %s -o %t -// RUN: llvm-mc -filetype=obj -triple=thumbv7a-none-linux-gnueabi %S/Inputs/far-arm-thumb-abs.s -o %tfar -// RUN: not ld.lld %t %tfar -o %t2 2>&1 | FileCheck %s -// REQUIRES: arm - .syntax unified - .section .text, "ax",%progbits - .globl _start - .balign 0x10000 - .type _start,%function -_start: - // address of too_far symbols are just out of range of ARM branch with - // 26-bit immediate field and an addend of -8 - bl too_far1 - b too_far2 - beq.w too_far3 - -// CHECK: R_ARM_THM_CALL out of range -// CHECK-NEXT: R_ARM_THM_JUMP24 out of range -// CHECK-NEXT: R_ARM_THM_JUMP19 out of range Index: test/ELF/arm-thumb-branch-rangethunk.s =================================================================== --- /dev/null +++ test/ELF/arm-thumb-branch-rangethunk.s @@ -0,0 +1,36 @@ +// RUN: llvm-mc -filetype=obj -triple=thumbv7a-none-linux-gnueabi %s -o %t +// RUN: llvm-mc -filetype=obj -triple=thumbv7a-none-linux-gnueabi %S/Inputs/far-arm-thumb-abs.s -o %tfar +// RUN: ld.lld %t %tfar -o %t2 2>&1 +// RUN: llvm-objdump -d -triple=thumbv7a-none-linux-gnueabi %t2 +// REQUIRES: arm + .syntax unified + .thumb + .section .text, "ax",%progbits + .globl _start + .balign 0x10000 + .type _start,%function +_start: + // address of too_far symbols are just out of range of ARM branch with + // 26-bit immediate field and an addend of -8 + bl too_far1 + b too_far2 + beq.w too_far3 + +// CHECK: Disassembly of section .text: +// CHECK-NEXT: _start: +// CHECK-NEXT: 20000: 00 f0 04 f8 bl #8 +// CHECK-NEXT: 20004: 00 f0 07 b8 b.w #14 <__Thumbv7ABSLongThunk_too_far2> +// CHECK-NEXT: 20008: 00 f0 0a 80 beq.w #20 <__Thumbv7ABSLongThunk_too_far3> +// CHECK: __Thumbv7ABSLongThunk_too_far1: +// CHECK-NEXT: 2000c: 40 f2 05 0c movw r12, #5 +// CHECK-NEXT: 20010: c0 f2 02 1c movt r12, #258 +// CHECK-NEXT: 20014: 60 47 bx r12 +// CHECK: __Thumbv7ABSLongThunk_too_far2: +// CHECK-NEXT: 20016: 40 f2 09 0c movw r12, #9 +// CHECK-NEXT: 2001a: c0 f2 02 1c movt r12, #258 +// CHECK-NEXT: 2001e: 60 47 bx r12 +// CHECK: __Thumbv7ABSLongThunk_too_far3: +// CHECK-NEXT: 20020: 40 f2 0d 0c movw r12, #13 +// CHECK-NEXT: 20024: c0 f2 12 0c movt r12, #18 +// CHECK-NEXT: 20028: 60 47 bx r12 +// CHECK-NEXT: 2002a: 00 00 movs r0, r0