Index: lib/Target/WebAssembly/WebAssemblyFrameLowering.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -46,6 +46,7 @@ bool hasReservedCallFrame(const MachineFunction &MF) const override; private: + bool hasBP(const MachineFunction &MF) const; bool needsSP(const MachineFunction &MF, const MachineFrameInfo &MFI) const; bool needsSPWriteback(const MachineFunction &MF, const MachineFrameInfo &MFI) const; Index: lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -37,15 +37,34 @@ // TODO: wasm64 // TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions +/// We need a base pointer in the case of having overaligned items on the stack. +/// Because we need to shift the stack pointer by some unknown amount to force +/// the alignment, we need to record the value of the stack pointer on entry to +/// the function. +bool WebAssemblyFrameLowering::hasBP( + const MachineFunction &MF) const { + const auto *RegInfo = + MF.getSubtarget().getRegisterInfo(); + return RegInfo->needsStackRealignment(MF); +} + /// Return true if the specified function should have a dedicated frame pointer /// register. bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const auto *RegInfo = - MF.getSubtarget().getRegisterInfo(); - return MFI.isFrameAddressTaken() || MFI.hasVarSizedObjects() || - MFI.hasStackMap() || MFI.hasPatchPoint() || - RegInfo->needsStackRealignment(MF); + + // When we have var-sized objects, we move the stack pointer by an unknown + // amount, and need to emit a frame pointer to restore the stack to where we + // were on function entry. + // If we already need a base pointer, we use that to fix up the stack pointer. + // If there are no fixed-size objects, we would have no use of a frame + // pointer, and thus should not emit one. + bool HasFixedSizedObjects = MFI.getStackSize() > 0; + bool NeedsFixedReference = !hasBP(MF) || HasFixedSizedObjects; + + return MFI.isFrameAddressTaken() || + (MFI.hasVarSizedObjects() && NeedsFixedReference) || + MFI.hasStackMap() || MFI.hasPatchPoint(); } /// Under normal circumstances, when a frame pointer is not required, we reserve @@ -107,7 +126,7 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - assert(!I->getOperand(0).getImm() && hasFP(MF) && + assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) && "Call frame pseudos should only be used for dynamic stack adjustment"); const auto *TII = MF.getSubtarget().getInstrInfo(); if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && @@ -153,6 +172,14 @@ .addReg(Zero) // addr .addMemOperand(LoadMMO); + bool HasBP = hasBP(MF); + if (HasBP) { + auto FI = MF.getInfo(); + unsigned BasePtr = MRI.createVirtualRegister(PtrRC); + FI->setBasePointerVreg(BasePtr); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr) + .addReg(StackSize ? SPReg : WebAssembly::SP32); + } if (StackSize) { // Subtract the frame size unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); @@ -163,6 +190,17 @@ .addReg(SPReg) .addReg(OffsetReg); } + if (HasBP) { + unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC); + unsigned Alignment = MFI.getMaxAlignment(); + assert((1 << countTrailingZeros(Alignment)) == Alignment && + "Alignment must be a power of 2"); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg) + .addImm((int)~(Alignment - 1)); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32), WebAssembly::SP32) + .addReg(WebAssembly::SP32) + .addReg(BitmaskReg); + } if (hasFP(MF)) { // Unlike most conventional targets (where FP points to the saved FP), // FP points to the bottom of the fixed-size locals, so we can use positive @@ -193,7 +231,10 @@ // subtracted in the prolog. unsigned SPReg = 0; MachineBasicBlock::iterator InsertAddr = InsertPt; - if (StackSize) { + if (hasBP(MF)) { + auto FI = MF.getInfo(); + SPReg = FI->getBasePointerVreg(); + } else if (StackSize) { const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); Index: lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -46,6 +46,8 @@ // TLI::LowerVASTART unsigned VarargVreg = -1U; + unsigned BasePtrVreg = -1U; + public: explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} ~WebAssemblyFunctionInfo() override; @@ -65,6 +67,12 @@ } void setVarargBufferVreg(unsigned Reg) { VarargVreg = Reg; } + unsigned getBasePointerVreg() const { + assert(BasePtrVreg != -1U && "Base ptr vreg hasn't been set"); + return BasePtrVreg; + } + void setBasePointerVreg(unsigned Reg) { BasePtrVreg = Reg; } + static const unsigned UnusedReg = -1u; void stackifyVReg(unsigned VReg) { Index: test/CodeGen/WebAssembly/alignment.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/alignment.ll @@ -0,0 +1,134 @@ +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare void @somefunc(i32*) + +; CHECK-LABEL: underalign: +; CHECK: i32.load $push[[L1:.+]]=, __stack_pointer{{.+}} +; CHECK: i32.sub $push[[L10:.+]]=, $pop[[L1]], $pop{{.+}} +; CHECK-NEXT: tee_local $push{{.+}}=, $[[SP:.+]]=, $pop[[L10]] + +; CHECK: i32.add $push[[underaligned:.+]]=, $[[SP]], $pop{{.+}} +; CHECK-NEXT: call somefunc@FUNCTION, $pop[[underaligned]] + +; CHECK: i32.add $push[[L5:.+]]=, $[[SP]], $pop{{.+}} +; CHECK-NEXT: i32.store __stack_pointer($pop{{.+}}), $pop[[L5]] +define void @underalign() { +entry: + %underaligned = alloca i32, align 8 + call void @somefunc(i32* %underaligned) + ret void +} + +; CHECK-LABEL: overalign: +; CHECK: i32.load $push[[L10:.+]]=, __stack_pointer +; CHECK-NEXT: tee_local $push[[L9:.+]]=, $[[BP:.+]]=, $pop[[L10]] +; CHECK: i32.sub $push[[L8:.+]]=, $pop[[L9]], $pop{{.+}} +; CHECK: i32.and $push[[L7:.+]]=, $pop[[L8]], $pop{{.+}} +; CHECK-NEXT: tee_local $push{{.+}}=, $[[SP:.+]]=, $pop[[L7]] + +; CHECK: call somefunc@FUNCTION, $[[SP]] + +; CHECK: copy_local $push[[L5:.+]]=, $[[BP]] +; CHECK-NEXT: i32.store __stack_pointer($pop{{.+}}), $pop[[L5]] +define void @overalign() { +entry: + %overaligned = alloca i32, align 32 + call void @somefunc(i32* %overaligned) + ret void +} + +; CHECK-LABEL: over_and_normal_align: +; CHECK: i32.load $push[[L14:.+]]=, __stack_pointer +; CHECK-NEXT: tee_local $push[[L13:.+]]=, $[[BP:.+]]=, $pop[[L14]] +; CHECK: i32.sub $push[[L12:.+]]=, $pop[[L13]], $pop{{.+}} +; CHECK: i32.and $push[[L11:.+]]=, $pop[[L12]], $pop{{.+}} +; CHECK-NEXT: tee_local $push{{.+}}=, $[[SP]]=, $pop[[L11]] + +; CHECK: i32.add $push[[L6:.+]]=, $[[SP]], $pop{{.+}} +; CHECK-NEXT: call somefunc@FUNCTION, $pop[[L6]] +; CHECK: i32.add $push[[L8:.+]]=, $[[SP]], $pop{{.+}} +; CHECK-NEXT: call somefunc@FUNCTION, $pop[[L8]] + +; CHECK: copy_local $push[[L9:.+]]=, $[[BP]] +; CHECK-NEXT: i32.store __stack_pointer({{.+}}), $pop[[L9]] +define void @over_and_normal_align() { +entry: + %over = alloca i32, align 32 + %normal = alloca i32 + call void @somefunc(i32* %over) + call void @somefunc(i32* %normal) + ret void +} + +; CHECK-LABEL: dynamic_overalign: +; CHECK: i32.load $push[[L18:.+]]=, __stack_pointer +; CHECK-NEXT: tee_local $push[[L17:.+]]=, $[[SP:.+]]=, $pop[[L18]] +; CHECK-NEXT: copy_local $[[BP:.+]]=, $pop[[L17]] +; CHECK: tee_local $push{{.+}}=, $[[SP_2:.+]]=, $pop{{.+}} + +; CHECK: call somefunc@FUNCTION, $[[SP_2]] + +; CHECK: i32.store __stack_pointer($pop{{.+}}), $[[BP]] +define void @dynamic_overalign(i32 %num) { +entry: + %dynamic = alloca i32, i32 %num, align 32 + call void @somefunc(i32* %dynamic) + ret void +} + +; CHECK-LABEL: overalign_and_dynamic: +; CHECK: i32.load $push[[L21:.+]]=, __stack_pointer +; CHECK-NEXT: tee_local $push[[L20:.+]]=, $[[BP:.+]]=, $pop[[L21]] +; CHECK: i32.sub $push[[L19:.+]]=, $pop[[L20]], $pop{{.+}} +; CHECK: i32.and $push[[L18:.+]]=, $pop[[L19]], $pop{{.+}} +; CHECK: tee_local $push{{.+}}=, $[[FP:.+]]=, $pop[[L18]] +; CHECK: i32.sub $push[[L16:.+]]=, $[[FP]], $pop{{.+}} +; CHECK-NEXT: tee_local $push{{.+}}=, $[[SP:.+]]=, $pop[[L16]] + +; CHECK: copy_local $push[[over:.+]]=, $[[FP]] +; CHECK-NEXT: call somefunc@FUNCTION, $pop[[over]] +; CHECK-NEXT: call somefunc@FUNCTION, $[[SP]] + +; CHECK: copy_local $push[[L12:.+]]=, $[[BP]] +; CHECK-NEXT: i32.store __stack_pointer($pop{{.+}}), $pop[[L12]] +define void @overalign_and_dynamic(i32 %num) { +entry: + %over = alloca i32, align 32 + %dynamic = alloca i32, i32 %num + call void @somefunc(i32* %over) + call void @somefunc(i32* %dynamic) + ret void +} + +; CHECK-LABEL: overalign_static_and_dynamic: +; CHECK: i32.load $push[[L26:.+]]=, __stack_pointer +; CHECK-NEXT: tee_local $push[[L25:.+]]=, $[[BP:.+]]=, $pop[[L26]] +; CHECK: i32.sub $push[[L24:.+]]=, $pop[[L25]], $pop{{.+}} +; CHECK: i32.and $push[[L23:.+]]=, $pop[[L24]], $pop{{.+}} +; CHECK: tee_local $push{{.+}}=, $[[FP:.+]]=, $pop[[L23]] +; CHECK: i32.sub $push[[L21:.+]]=, $[[FP]], $pop{{.+}} +; CHECK-NEXT: tee_local $push{{.+}}=, $[[SP:.+]]=, $pop[[L21]] + +; CHECK: copy_local $push[[L19:.+]]=, $[[FP]] +; CHECK: tee_local $push[[L18:.+]]=, $[[FP_2:.+]]=, $pop[[L19]] +; CHECK: i32.add $push[[over:.+]]=, $pop[[L18]], $pop{{.+}} +; CHECK-NEXT: call somefunc@FUNCTION, $pop[[over]] +; CHECK: call somefunc@FUNCTION, $[[SP]] +; CHECK: i32.add $push[[static:.+]]=, $[[FP_2]], $pop{{.+}} +; CHECK-NEXT: call somefunc@FUNCTION, $pop[[static]] + +; CHECK: copy_local $push[[L16:.+]]=, $[[BP]] +; CHECK-NEXT: i32.store __stack_pointer({{.+}}), $pop[[L16]] +define void @overalign_static_and_dynamic(i32 %num) { +entry: + %over = alloca i32, align 32 + %dynamic = alloca i32, i32 %num + %static = alloca i32 + call void @somefunc(i32* %over) + call void @somefunc(i32* %dynamic) + call void @somefunc(i32* %static) + ret void +}