diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -909,12 +909,22 @@ return false; } - /// Use \p Input1 or Input2 as the current value for the input register and - /// put in \p Output the changes incurred by executing \p Inst. Return false - /// if it was not possible to perform the evaluation. - virtual bool evaluateSimple(const MCInst &Inst, int64_t &Output, - std::pair Input1, - std::pair Input2) const { + /// Use \p Input1 or Input2 as the current value for the input + /// register and put in \p Output the changes incurred by executing + /// \p Inst. Return false if it was not possible to perform the + /// evaluation. evaluateStackOffsetExpr is restricted to operations + /// that have associativity with addition. Its intended usage is for + /// evaluating stack offset changes. In these cases, expressions + /// appear in the form of (x + offset) OP constant, where x is an + /// unknown base (such as stack base) but offset and constant are + /// known. In these cases, \p Output represents the new stack offset + /// after executing \p Inst. Because we don't know x, we can't + /// evaluate operations such as multiply or AND/OR, e.g. (x + + /// offset) OP constant is not the same as x + (offset OP constant). + virtual bool + evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output, + std::pair Input1, + std::pair Input2) const { llvm_unreachable("not implemented"); return false; } diff --git a/bolt/include/bolt/Passes/StackPointerTracking.h b/bolt/include/bolt/Passes/StackPointerTracking.h --- a/bolt/include/bolt/Passes/StackPointerTracking.h +++ b/bolt/include/bolt/Passes/StackPointerTracking.h @@ -115,7 +115,7 @@ else FP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) { + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) { if (SPVal == EMPTY && FPVal == EMPTY) return SPVal; return SUPERPOSITION; @@ -150,7 +150,7 @@ else SP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) { + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) { if (SPVal == EMPTY && FPVal == EMPTY) return FPVal; return SUPERPOSITION; diff --git a/bolt/lib/Passes/AllocCombiner.cpp b/bolt/lib/Passes/AllocCombiner.cpp --- a/bolt/lib/Passes/AllocCombiner.cpp +++ b/bolt/lib/Passes/AllocCombiner.cpp @@ -29,9 +29,9 @@ bool getStackAdjustmentSize(const BinaryContext &BC, const MCInst &Inst, int64_t &Adjustment) { - return BC.MIB->evaluateSimple(Inst, Adjustment, - std::make_pair(BC.MIB->getStackPointer(), 0LL), - std::make_pair(0, 0LL)); + return BC.MIB->evaluateStackOffsetExpr( + Inst, Adjustment, std::make_pair(BC.MIB->getStackPointer(), 0LL), + std::make_pair(0, 0LL)); } bool isIndifferentToSP(const MCInst &Inst, const BinaryContext &BC) { diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp --- a/bolt/lib/Passes/ShrinkWrapping.cpp +++ b/bolt/lib/Passes/ShrinkWrapping.cpp @@ -246,7 +246,7 @@ SP = std::make_pair(0, 0); int64_t Output; - if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) + if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return; // Not your regular frame pointer initialization... bail @@ -294,7 +294,7 @@ SP = std::make_pair(0, 0); int64_t Output; - if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) + if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return; // If the value is the same of FP, no need to adjust it diff --git a/bolt/lib/Passes/StackAllocationAnalysis.cpp b/bolt/lib/Passes/StackAllocationAnalysis.cpp --- a/bolt/lib/Passes/StackAllocationAnalysis.cpp +++ b/bolt/lib/Passes/StackAllocationAnalysis.cpp @@ -134,7 +134,7 @@ else FP = std::make_pair(0, 0); int64_t Output; - if (!MIB->evaluateSimple(Point, Output, SP, FP)) + if (!MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) return Next; if (SPOffset < Output) { diff --git a/bolt/lib/Passes/ValidateInternalCalls.cpp b/bolt/lib/Passes/ValidateInternalCalls.cpp --- a/bolt/lib/Passes/ValidateInternalCalls.cpp +++ b/bolt/lib/Passes/ValidateInternalCalls.cpp @@ -261,7 +261,7 @@ int64_t Output; std::pair Input1 = std::make_pair(Reg, 0); std::pair Input2 = std::make_pair(0, 0); - if (!BC.MIB->evaluateSimple(Use, Output, Input1, Input2)) { + if (!BC.MIB->evaluateStackOffsetExpr(Use, Output, Input1, Input2)) { LLVM_DEBUG(dbgs() << "Evaluate simple failed.\n"); return false; } diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -1244,9 +1244,10 @@ return false; } - bool evaluateSimple(const MCInst &Inst, int64_t &Output, - std::pair Input1, - std::pair Input2) const override { + bool + evaluateStackOffsetExpr(const MCInst &Inst, int64_t &Output, + std::pair Input1, + std::pair Input2) const override { auto getOperandVal = [&](MCPhysReg Reg) -> ErrorOr { if (Reg == Input1.first) @@ -1260,16 +1261,6 @@ default: return false; - case X86::AND64ri32: - case X86::AND64ri8: - if (!Inst.getOperand(2).isImm()) - return false; - if (ErrorOr InputVal = - getOperandVal(Inst.getOperand(1).getReg())) - Output = *InputVal & Inst.getOperand(2).getImm(); - else - return false; - break; case X86::SUB64ri32: case X86::SUB64ri8: if (!Inst.getOperand(2).isImm()) diff --git a/bolt/test/X86/shrinkwrapping-and-rsp.s b/bolt/test/X86/shrinkwrapping-and-rsp.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/shrinkwrapping-and-rsp.s @@ -0,0 +1,55 @@ +# This checks that shrink wrapping does attempt at accessing stack elements +# using RSP when the function is aligning RSP and changing offsets. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata \ +# RUN: -frame-opt=all -simplify-conditional-tail-calls=false \ +# RUN: -eliminate-unreachable=false | FileCheck %s + +# Here we have a function that aligns the stack at prologue. Stack pointer +# analysis can't try to infer offset positions after AND because that depends +# on the runtime value of the stack pointer of callee (whether it is misaligned +# or not). + .globl _start + .type _start, %function +_start: + .cfi_startproc +# FDATA: 0 [unknown] 0 1 _start 0 0 1 + push %rbp + mov %rsp, %rbp + push %rbx + push %r14 + and $0xffffffffffffffe0,%rsp + subq $0x20, %rsp +b: je hot_path +# FDATA: 1 _start #b# 1 _start #hot_path# 0 1 +cold_path: + mov %r14, %rdi + #mov %rbx, %rdi + # Block push-pop mode by using an instruction that requires the + # stack to be aligned to 128B. This will force the pass + # to try to index stack elements by using RSP +offset directly, but + # we do not know how to access individual elements of the stack thanks + # to the alignment. + movdqa %xmm8, (%rsp) + addq $0x20, %rsp + pop %r14 + pop %rbx + pop %rbp + ret +hot_path: + addq $0x20, %rsp + pop %r14 + pop %rbx + pop %rbp + ret + .cfi_endproc + .size _start, .-_start + +# CHECK: BOLT-INFO: Shrink wrapping moved 0 spills inserting load/stores and 0 spills inserting push/pops