Index: ELF/LinkerScript.h =================================================================== --- ELF/LinkerScript.h +++ ELF/LinkerScript.h @@ -15,6 +15,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/StringSaver.h" namespace lld { namespace elf { @@ -23,6 +24,46 @@ template class InputSectionBase; template class OutputSectionBase; +class ExpressionEvaluator { + typedef uint64_t (ExpressionEvaluator::*OpHandler)(StringRef Arg1, + StringRef Arg2); + +public: + ExpressionEvaluator(llvm::BumpPtrAllocator *A) : Saver(*A) {} + + // Evaluates the expression given by list of tokens. + uint64_t evaluate(std::vector &Tokens, uint64_t LocCounter); + +protected: + uint64_t OpPlus(StringRef Arg1, StringRef Arg2); + uint64_t OpMinus(StringRef Arg1, StringRef Arg2); + uint64_t OpDiv(StringRef Arg1, StringRef Arg2); + uint64_t OpMul(StringRef Arg1, StringRef Arg2); + uint64_t OpAnd(StringRef Arg1, StringRef Arg2); + + uint64_t getValue(StringRef V); + + // Builds Reverse Polish Notation using + // shunting-yard algorithm by Edsger Dijkstra. + // https://en.wikipedia.org/wiki/Shunting-yard_algorithm + std::vector buildRPN(std::vector &Tokens); + + bool checkBracketsConsistency(std::vector &Tokens); + + bool isOperator(StringRef O) { return Operators.find(O) != Operators.end(); } + uint8_t getPriority(StringRef O) { return Operators.find(O)->second.Rank; } + + struct Operator { + uint8_t Rank; + OpHandler Handler; + }; + const static llvm::StringMap Operators; + + uint64_t LocCounter; + + llvm::StringSaver Saver; +}; + // This class represents each rule in SECTIONS command. class SectionRule { public: @@ -89,6 +130,8 @@ // Used to assign addresses to sections. std::vector Locations; + std::unique_ptr Eval; + llvm::BumpPtrAllocator Alloc; }; Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/StringSaver.h" using namespace llvm; using namespace llvm::ELF; @@ -43,27 +42,156 @@ return V; } -// Evaluates the expression given by list of tokens. -uint64_t LinkerScript::evaluate(std::vector &Tokens, - uint64_t LocCounter) { - uint64_t Result = 0; - for (size_t I = 0, E = Tokens.size(); I < E; ++I) { - // Each second token should be '+' as this is the - // only operator we support now. - if (I % 2 == 1) { - if (Tokens[I] == "+") - continue; - error("error in location counter expression"); +uint64_t ExpressionEvaluator::evaluate(std::vector &Tokens, + uint64_t LocCounter) { + this->LocCounter = LocCounter; + + std::vector RPN = buildRPN(Tokens); + if (HasError) + return 0; + + std::vector Stack; + for (StringRef Op : RPN) { + if (!isOperator(Op)) { + Stack.push_back(Op); + continue; + } + + StringRef Arg2 = *std::prev(Stack.end(), 1); + StringRef Arg1 = *std::prev(Stack.end(), 2); + Stack.erase(std::prev(Stack.end(), 2), Stack.end()); + + auto I = Operators.find(Op); + if (I == Operators.end() || !I->second.Handler) + fatal("wrong or unknown operator in RPN expression: " + Op); + + uint64_t Val = (this->*I->second.Handler)(Arg1, Arg2); + Stack.push_back(Saver.save(std::to_string(Val).c_str())); + + if (HasError) return 0; + } + + assert(Stack.size() == 1); + return getValue(Stack.back()); +} + +uint64_t ExpressionEvaluator::OpPlus(StringRef Arg1, StringRef Arg2) { + return getValue(Arg1) + getValue(Arg2); +} + +uint64_t ExpressionEvaluator::OpMinus(StringRef Arg1, StringRef Arg2) { + return getValue(Arg1) - getValue(Arg2); +} + +uint64_t ExpressionEvaluator::OpDiv(StringRef Arg1, StringRef Arg2) { + uint64_t Val = getValue(Arg2); + if (!Val) { + error("division by zero in expression"); + return 0; + } + return getValue(Arg1) / Val; +} + +uint64_t ExpressionEvaluator::OpMul(StringRef Arg1, StringRef Arg2) { + return getValue(Arg1) * getValue(Arg2); +} + +uint64_t ExpressionEvaluator::OpAnd(StringRef Arg1, StringRef Arg2) { + return getValue(Arg1) & getValue(Arg2); +} + +uint64_t ExpressionEvaluator::getValue(StringRef V) { + if (V == ".") + return LocCounter; + return getInteger(V); +} + +std::vector +ExpressionEvaluator::buildRPN(std::vector &Tokens) { + std::vector OutString; + std::list Stack; + + if (!checkBracketsConsistency(Tokens)) { + error("inconsistent opening/closing bracket"); + return {}; + } + + for (StringRef Tok : Tokens) { + // If the token is a number, then add it to the output queue. + if (!isOperator(Tok)) { + OutString.push_back(Tok); + continue; } - StringRef Tok = Tokens[I]; - if (Tok == ".") - Result += LocCounter; - else - Result += getInteger(Tok); + // If the token is a left parenthesis, then push it onto the stack. + if (Tok == "(") { + Stack.push_back(Tok); + continue; + } + + // If the token is a right parenthesis: + // Until the token at the top of the stack is a left parenthesis, pop + // operators off the stack onto the output queue. Pop the left parenthesis + // from the stack, but not onto the output queue. + if (Tok == ")") { + while (!Stack.empty()) { + StringRef StackTok = Stack.back(); + Stack.pop_back(); + if (StackTok == "(") + break; + OutString.push_back(StackTok); + } + continue; + } + + // If the token is an operator, Tok, then: + // while there is an operator token St, at the top of the operator stack + // and Tok precedence is less than or equal to that of St, + // then pop StackTok off the operator stack, onto the output queue. + // At the end of iteration push Tok onto the operator stack. + while (!Stack.empty()) { + StringRef St = Stack.back(); + if (getPriority(Tok) > getPriority(St)) + break; + OutString.push_back(St); + Stack.pop_back(); + } + Stack.push_back(Tok); + } + + // Add the remaining stack content to output string. + OutString.insert(OutString.end(), Stack.rbegin(), Stack.rend()); + return OutString; +} + +bool ExpressionEvaluator::checkBracketsConsistency(std::vector &E) { + int32_t Open = 0; + for (StringRef Tok : E) { + if (Tok == "(") + ++Open; + else if (Tok == ")") + --Open; + if (Open < 0) + return false; } - return Result; + return Open == 0; +} + +const StringMap ExpressionEvaluator::Operators = + {{"*", {80, &ExpressionEvaluator::OpMul}}, + {"/", {80, &ExpressionEvaluator::OpDiv}}, + {"+", {50, &ExpressionEvaluator::OpPlus}}, + {"-", {50, &ExpressionEvaluator::OpMinus}}, + {"&", {30, &ExpressionEvaluator::OpAnd}}, + {"(", {20, nullptr}}, + {")", {20, nullptr}}}; + +uint64_t LinkerScript::evaluate(std::vector &Tokens, + uint64_t LocCounter) { + if (!Eval) + Eval.reset(new ExpressionEvaluator(&Alloc)); + return Eval->evaluate(Tokens, LocCounter); } template Index: test/ELF/linkerscript-locationcounter.s =================================================================== --- test/ELF/linkerscript-locationcounter.s +++ test/ELF/linkerscript-locationcounter.s @@ -1,63 +1,115 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t # RUN: echo "SECTIONS { \ -# RUN: . = 0x12341; \ -# RUN: .data : { *(.data) } \ -# RUN: . = . + 0x10000; \ -# RUN: .text : { *(.text) } \ +# RUN: . = 0xFFF0; \ +# RUN: . = . + 0x10; \ +# RUN: .plus : { *(.plus) } \ +# RUN: . = 0x11010 - 0x10; \ +# RUN: .minus : { *(.minus) } \ +# RUN: . = 0x24000 / 0x2; \ +# RUN: .div : { *(.div) } \ +# RUN: . = 0x11000 + 0x1000 * 0x2; \ +# RUN: .mul : { *(.mul) } \ +# RUN: . = 0x10000 + (0x1000 + 0x1000) * 0x2; \ +# RUN: .bracket : { *(.bracket) } \ +# RUN: . = 0x17000 & 0x15000; \ +# RUN: .and : { *(.and) } \ # RUN: }" > %t.script - # RUN: ld.lld %t --script %t.script -o %t2 # RUN: llvm-readobj -s %t2 | FileCheck %s -# CHECK: Sections [ -# CHECK-NEXT: Section { -# CHECK-NEXT: Index: 0 -# CHECK-NEXT: Name: (0) -# CHECK-NEXT: Type: SHT_NULL -# CHECK-NEXT: Flags [ -# CHECK-NEXT: ] -# CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x0 -# CHECK-NEXT: Size: 0 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 0 -# CHECK-NEXT: EntrySize: 0 -# CHECK-NEXT: } -# CHECK-NEXT: Section { -# CHECK-NEXT: Index: 1 -# CHECK-NEXT: Name: .data -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_WRITE -# CHECK-NEXT: ] -# CHECK-NEXT: Address: 0x12341 -# CHECK-NEXT: Offset: 0x158 -# CHECK-NEXT: Size: 8 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 1 -# CHECK-NEXT: EntrySize: 0 -# CHECK-NEXT: } -# CHECK-NEXT: Section { -# CHECK-NEXT: Index: 2 -# CHECK-NEXT: Name: .text -# CHECK-NEXT: Type: SHT_PROGBITS -# CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_EXECINSTR -# CHECK-NEXT: ] -# CHECK-NEXT: Address: 0x2234C -# CHECK-NEXT: Offset: 0x160 -# CHECK-NEXT: Size: 1 -# CHECK-NEXT: Link: 0 -# CHECK-NEXT: Info: 0 -# CHECK-NEXT: AddressAlignment: 4 -# CHECK-NEXT: EntrySize: 0 -# CHECK-NEXT: } +# CHECK: Section { +# CHECK: Index: 1 +# CHECK: Name: .plus +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Flags [ +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x10000 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Size: +# CHECK-NEXT: Link: +# CHECK-NEXT: Info: +# CHECK-NEXT: AddressAlignment: +# CHECK-NEXT: EntrySize: +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 2 +# CHECK-NEXT: Name: .minus +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Flags [ +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x11000 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Size: +# CHECK-NEXT: Link: +# CHECK-NEXT: Info: +# CHECK-NEXT: AddressAlignment: +# CHECK-NEXT: EntrySize: +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 3 +# CHECK-NEXT: Name: .div +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Flags [ +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x12000 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Size: +# CHECK-NEXT: Link: +# CHECK-NEXT: Info: +# CHECK-NEXT: AddressAlignment: +# CHECK-NEXT: EntrySize: +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 4 +# CHECK-NEXT: Name: .mul +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Flags [ +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x13000 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Size: +# CHECK-NEXT: Link: +# CHECK-NEXT: Info: +# CHECK-NEXT: AddressAlignment: +# CHECK-NEXT: EntrySize: +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: 5 +# CHECK-NEXT: Name: .bracket +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Flags [ +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x14000 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Size: +# CHECK-NEXT: Link: +# CHECK-NEXT: Info: +# CHECK-NEXT: AddressAlignment: +# CHECK-NEXT: EntrySize: +# CHECK-NEXT: } +# CHECK-NEXT: Section { +# CHECK-NEXT: Index: +# CHECK-NEXT: Name: .and +# CHECK-NEXT: Type: SHT_PROGBITS +# CHECK-NEXT: Flags [ +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: ] +# CHECK-NEXT: Address: 0x15000 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Size: +# CHECK-NEXT: Link: +# CHECK-NEXT: Info: +# CHECK-NEXT: AddressAlignment: +# CHECK-NEXT: EntrySize: +# CHECK-NEXT: } +## Mailformed number error. # RUN: echo "SECTIONS { \ # RUN: . = 0x12Q41; \ # RUN: }" > %t.script @@ -65,10 +117,55 @@ # RUN: FileCheck --check-prefix=NUMERR %s # NUMERR: malformed number: 0x12Q41 +## Missing closing bracket. +# RUN: echo "SECTIONS { \ +# RUN: . = 0x10000 + (0x1000 + 0x1000 * 0x2; \ +# RUN: }" > %t.script +# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \ +# RUN: FileCheck --check-prefix=BRACKETERR %s +# BRACKETERR: inconsistent opening/closing bracket + +## Missing opening bracket. +# RUN: echo "SECTIONS { \ +# RUN: . = 0x10000 + 0x1000 + 0x1000) * 0x2; \ +# RUN: }" > %t.script +# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \ +# RUN: FileCheck --check-prefix=BRACKETERR %s + +## Empty expression. +# RUN: echo "SECTIONS { \ +# RUN: . = ; \ +# RUN: }" > %t.script +# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \ +# RUN: FileCheck --check-prefix=ERREXPR %s +# ERREXPR: error in location counter expression + +## Div by zero error. +# RUN: echo "SECTIONS { \ +# RUN: . = 0x10000 / 0x0; \ +# RUN: }" > %t.script +# RUN: not ld.lld %t --script %t.script -o %t2 2>&1 | \ +# RUN: FileCheck --check-prefix=DIVZERO %s +# DIVZERO: division by zero in expression + .globl _start; _start: nop -.section .data +.section .plus, "a" .quad 0 +.section .minus, "a" +.quad 0 + +.section .div, "a" +.quad 0 + +.section .mul, "a" +.quad 0 + +.section .bracket, "a" +.quad 0 + +.section .and, "a" +.quad 0