Index: lib/Target/ARM/ARMBaseRegisterInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -385,16 +385,12 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMFunctionInfo *AFI = MF.getInfo(); const ARMFrameLowering *TFI = getFrameLowering(MF); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, - // 2. This is a Thumb1 function (it's not useful, so we don't bother), or - // 3. There are VLAs in the function and the base pointer is disabled. + // 2. There are VLAs in the function and the base pointer is disabled. if (!TargetRegisterInfo::canRealignStack(MF)) return false; - if (AFI->isThumb1OnlyFunction()) - return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget()))) Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -352,10 +352,36 @@ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // Thumb1 does not currently support dynamic stack realignment. Report a - // fatal error rather then silently generate bad code. - if (RegInfo->needsStackRealignment(MF)) - report_fatal_error("Dynamic stack realignment not supported for thumb1."); + if (RegInfo->needsStackRealignment(MF)) { + const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment()); + // Emit the following sequence, using R4 as a temporary, since we cannot use + // SP as a source or destination register for the shifts: + // mov r4, sp + // lsrs r4, r4, #NrBitsToZero + // lsls r4, r4, #NrBitsToZero + // mov sp, r4 + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill) + .add(predOps(ARMCC::AL)); + + AFI->setShouldRestoreSPFromFP(true); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects Index: test/CodeGen/ARM/thumb1_return_sequence.ll =================================================================== --- test/CodeGen/ARM/thumb1_return_sequence.ll +++ test/CodeGen/ARM/thumb1_return_sequence.ll @@ -9,6 +9,8 @@ ; -------- ; CHECK-V4T: push {[[SAVED:(r[4567](, )?)+]], lr} ; CHECK-V4T: sub sp, +; Stack is realigned because of the <6 x i32> type +; CHECK-V4T: mov sp, r4 ; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr} %b = alloca <6 x i32>, align 16 @@ -21,7 +23,8 @@ ; Epilogue ; -------- -; CHECK-V4T: add sp, +; Stack realignment means sp is restored from frame pointer +; CHECK-V4T: mov sp ; CHECK-V4T-NEXT: pop {[[SAVED]]} ; The ISA for v4 does not support pop pc, so make sure we do not emit ; one even when we do not need to update SP. @@ -70,8 +73,9 @@ ; CHECK-V4T-NEXT: mov lr, [[POP_REG]] ; CHECK-V4T-NEXT: mov [[POP_REG]], r12 ; CHECK-V4T: bx lr -; CHECK-V5T: add sp, -; CHECK-V5T-NEXT: pop {[[SAVED]]} +; CHECK-V5T: lsls r4 +; CHECK-V5T-NEXT: mov sp, r4 +; CHECK-V5T: pop {[[SAVED]]} ; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]] ; CHECK-V5T-NEXT: pop {[[POP_REG]]} ; CHECK-V5T-NEXT: add sp, Index: test/CodeGen/Thumb/large-stack.ll =================================================================== --- test/CodeGen/Thumb/large-stack.ll +++ test/CodeGen/Thumb/large-stack.ll @@ -75,7 +75,7 @@ ; CHECK: add sp, [[TEMP3]] %retval = alloca i32, align 4 %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 + %a = alloca [805306369 x i8], align 4 store i32 0, i32* %tmp %tmp1 = load i32, i32* %tmp ret i32 %tmp1 @@ -91,7 +91,7 @@ ; CHECK: mov sp, r4 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 + %a = alloca [805306369 x i8], align 8 store i32 0, i32* %tmp %tmp1 = load i32, i32* %tmp ret i32 %tmp1 Index: test/CodeGen/Thumb/long.ll =================================================================== --- test/CodeGen/Thumb/long.ll +++ test/CodeGen/Thumb/long.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s +; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | \ +; RUN: FileCheck %s -check-prefix CHECK --check-prefix CHECK-EABI ; RUN: llc -mtriple=thumb-apple-darwin %s -verify-machineinstrs -o - | \ ; RUN: FileCheck %s -check-prefix CHECK -check-prefix CHECK-DARWIN @@ -172,10 +173,12 @@ %retval = load i64, i64* %a ; [#uses=1] ret i64 %retval ; CHECK-LABEL: f10: -; CHECK: sub sp, #8 +; CHECK-EABI: sub sp, #8 +; CHECK-DARWIN: add r7, sp, #0 ; CHECK: ldr r0, [sp] ; CHECK: ldr r1, [sp, #4] -; CHECK: add sp, #8 +; CHECK-EABI: add sp, #8 +; CHECK-DARWIN: mov sp, r7 } define i64 @f11(i64 %x, i64 %y) { Index: test/CodeGen/Thumb/stack-align.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/stack-align.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=thumb-eabi < %s -o - | FileCheck %s + +define void @f() local_unnamed_addr #0 { +entry: +; Check stack is realigned to 16 byte boundary +; CHECK: mov r4, sp + %i = alloca i32, align 16 +; CHECK-NEXT: lsrs r4, r4, #4 +; CHECK-NEXT: lsls r4, r4, #4 +; CHECK-NEXT: mov sp, r4 + store i32 0, i32* %i, align 16 + call void @g(i32* nonnull %i) + ret void +; Check stack is restored from frame pointer +; CHECK: mov sp, r7 +} + +declare void @g(i32*) local_unnamed_addr #2