diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -44,6 +44,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include #include #include #include @@ -173,8 +174,15 @@ mutable MCSymbol *FunctionConstantIslandLabel{nullptr}; mutable MCSymbol *FunctionColdConstantIslandLabel{nullptr}; + // Constant island alignment value + uint16_t Alignment{0}; // Returns constant island alignment - uint16_t getAlignment() const { return sizeof(uint64_t); } + // The minimum required alignment is 8 bytes + uint16_t getAlignment() const { + return std::max(Alignment, (uint16_t)sizeof(uint64_t)); + } + // Set constant island alignment value + void setAlignment(uint16_t Value) { Alignment = Value; } }; static constexpr uint64_t COUNT_NO_PROFILE = @@ -1092,6 +1100,9 @@ uint64_t getOutputSize() const { return OutputSize; } + // Return original alignment value of the function + uint16_t getInputAlignment() const { return 1 << (ffsll(getAddress()) - 1); } + /// Does this function have a valid streaming order index? bool hasValidIndex() const { return Index != -1U; } @@ -2058,6 +2069,11 @@ return Islands ? Islands->getAlignment() : 1; } + void setConstantIslandAlignment(uint16_t Alignment) { + assert(Islands && "function expected to have constant islands"); + Islands->setAlignment(Alignment); + } + uint64_t estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const { if (!Islands) diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -491,9 +491,7 @@ // AArch64 requires CI to be aligned to 8 bytes due to access instructions // restrictions. E.g. the ldr with imm, where imm must be aligned to 8 bytes. - const uint16_t Alignment = OnBehalfOf - ? OnBehalfOf->getConstantIslandAlignment() - : BF.getConstantIslandAlignment(); + const uint16_t Alignment = BF.getConstantIslandAlignment(); Streamer.emitCodeAlignment(Alignment, &*BC.STI); if (!OnBehalfOf) { diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -166,10 +166,11 @@ else alignMaxBytes(BF); - // Align objects that contains constant islands and no code - // to at least 8 bytes. + // Preserve initial alignment of the object that contains islands + // and no code. if (!BF.size() && BF.hasIslandsInfo()) { - const uint16_t Alignment = BF.getConstantIslandAlignment(); + const uint16_t Alignment = BF.getInputAlignment(); + BF.setConstantIslandAlignment(Alignment); if (BF.getAlignment() < Alignment) BF.setAlignment(Alignment); diff --git a/bolt/test/AArch64/object-in-code-alignment.s b/bolt/test/AArch64/object-in-code-alignment.s new file mode 100644 --- /dev/null +++ b/bolt/test/AArch64/object-in-code-alignment.s @@ -0,0 +1,37 @@ +// This test checks that the initial object in text alignment is preserved. +// This is needed for the cases like KeccakF1600_int in openssl, where the loop +// is breaked when the address of the object entry is aligned on 512 bytes, i.e. +// the object is aligned on 512 and have the size of 512 bytes. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -fPIC -pie -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.bolt -use-old-text=0 -lite=0 +# RUN: llvm-objdump -d -j .text %t.bolt | FileCheck %s + +.text +.align 2 +.global dummy +.type dummy, %object +dummy: + .word 255 +.size dummy, .-dummy + +# CHECK-DAG: {{.*}}00 : +.align 8 +.global table +.type table, %object +table: + .xword 0xdeadbeef +.size table, .-table + +.align 2 +.global _start +.type _start, %function +_start: + ldr x2, table + mov x0, #0 + ret +.Lci: + .word 0 +.size _start, .-_start +# CHECK-DAG: {{.*}}00: {{.*}} 0xdeadbeef