diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -44,6 +44,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include #include @@ -173,8 +174,15 @@ mutable MCSymbol *FunctionConstantIslandLabel{nullptr}; mutable MCSymbol *FunctionColdConstantIslandLabel{nullptr}; + // Constant island alignment value + uint16_t Alignment{0}; // Returns constant island alignment - uint16_t getAlignment() const { return sizeof(uint64_t); } + // The minimum required alignment is 8 bytes + uint16_t getAlignment() const { + return std::max(Alignment, (uint16_t)sizeof(uint64_t)); + } + // Set constant island alignment value + void setAlignment(uint16_t Value) { Alignment = Value; } }; static constexpr uint64_t COUNT_NO_PROFILE = @@ -1092,6 +1100,11 @@ uint64_t getOutputSize() const { return OutputSize; } + // Return original alignment value of the function based on it's address. + uint16_t guessInputAlignment() const { + return 1 << (ffsll(getAddress()) - 1); + } + /// Does this function have a valid streaming order index? bool hasValidIndex() const { return Index != -1U; } @@ -2058,6 +2071,11 @@ return Islands ? Islands->getAlignment() : 1; } + void setConstantIslandAlignment(uint16_t Alignment) { + assert(Islands && "function expected to have constant islands"); + Islands->setAlignment(Alignment); + } + uint64_t estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const { if (!Islands) diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -491,9 +491,7 @@ // AArch64 requires CI to be aligned to 8 bytes due to access instructions // restrictions. E.g. the ldr with imm, where imm must be aligned to 8 bytes. - const uint16_t Alignment = OnBehalfOf - ? OnBehalfOf->getConstantIslandAlignment() - : BF.getConstantIslandAlignment(); + const uint16_t Alignment = BF.getConstantIslandAlignment(); Streamer.emitCodeAlignment(Alignment, &*BC.STI); if (!OnBehalfOf) { diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -47,6 +47,12 @@ cl::desc("maximum number of bytes to use to align functions"), cl::init(32), cl::cat(BoltOptCategory)); +cl::opt AlignCIMaxBytes( + "align-ci-max-bytes", + cl::desc("maximum number of bytes to use to align constant islands or " + "in text objects"), + cl::init(512), cl::cat(BoltOptCategory)); + cl::opt BlockAlignment("block-alignment", cl::desc("boundary to use for alignment of basic blocks"), @@ -166,10 +172,19 @@ else alignMaxBytes(BF); - // Align objects that contains constant islands and no code - // to at least 8 bytes. + // Preserve initial alignment of the object that contains islands + // and no code. if (!BF.size() && BF.hasIslandsInfo()) { - const uint16_t Alignment = BF.getConstantIslandAlignment(); + uint16_t Alignment = BF.guessInputAlignment(); + if (Alignment > opts::AlignCIMaxBytes) { + outs() << "BOLT-WARNING: input alignment of text object " << BF << " (" + << Alignment << " bytes) " + << "is more then AlignCIMaxBytes. Setting alignment to " + << opts::AlignCIMaxBytes << " bytes.\n"; + Alignment = opts::AlignCIMaxBytes; + } + + BF.setConstantIslandAlignment(Alignment); if (BF.getAlignment() < Alignment) BF.setAlignment(Alignment); diff --git a/bolt/test/AArch64/object-in-code-alignment.s b/bolt/test/AArch64/object-in-code-alignment.s new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/object-in-code-alignment.s @@ -0,0 +1,42 @@ +// This test checks that the initial object in text alignment is preserved. +// This is needed for the cases like KeccakF1600_int in openssl, where the loop +// is breaked when the address of the object entry is aligned on 512 bytes, i.e. +// the object is aligned on 512 and have the size of 512 bytes. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -fPIC -pie -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.bolt -use-old-text=0 -lite=0 +# RUN: llvm-objdump -d -j .text %t.bolt | FileCheck %s +# RUN: llvm-bolt %t.exe -o /dev/null -use-old-text=0 -lite=0 --no-threads \ +# RUN: -align-ci-max-bytes=64 | FileCheck -check-prefix=CHECKWARN %s + +# CHECKWARN: input alignment of text object table (128 bytes) is more +# CHECKWARN-SAME: then AlignCIMaxBytes. Setting alignment to 64 bytes. + +.text +.align 8 +.global dummy +.type dummy, %object +dummy: + .word 255 +.size dummy, .-dummy + +# CHECK-DAG: {{.*}}{{0|8}}0 : +.align 7 +.global table +.type table, %object +table: + .xword 0xdeadbeef +.size table, .-table + +.align 2 +.global _start +.type _start, %function +_start: + ldr x2, table + mov x0, #0 + ret +.Lci: + .word 0 +.size _start, .-_start +# CHECK-DAG: {{.*}}{{0|8}}0: {{.*}} 0xdeadbeef