Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -892,7 +892,6 @@ bool isFixed = MFI.isFixedObjectIndex(FI); FrameReg = ARM::SP; - Offset += SPAdj; // SP can move around if there are allocas. We may also lose track of SP // when emergency spilling inside a non-reserved call frame setup. @@ -909,10 +908,13 @@ assert(RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"); FrameReg = RegInfo->getBaseRegister(); - } + } else + Offset += SPAdj; return Offset; } + Offset += SPAdj; + // If there is a frame pointer, use it when we can. if (hasFP(MF) && AFI->hasStackFrame()) { // Use frame pointer to reference fixed objects. Use it for locals if @@ -952,8 +954,14 @@ } } // Use the base pointer if we have one. - if (RegInfo->hasBasePointer(MF)) + // The only time this check can be true is when we have a thumb function with + // VLAs and no stack realignment. But this means that the call frame pseudos + // are eliminated during frame info calculation, which in turn means that + // there is no need to keep track of the sp. So, SPAdj will be 0. + if (RegInfo->hasBasePointer(MF)) { + assert(SPAdj == 0 && "Using base pointer with non-zero SP adjustment"); FrameReg = RegInfo->getBaseRegister(); + } return Offset; } Index: test/CodeGen/ARM/alloca-align.ll =================================================================== --- test/CodeGen/ARM/alloca-align.ll +++ test/CodeGen/ARM/alloca-align.ll @@ -12,8 +12,7 @@ ; And a base pointer getting used. ; CHECK: mov r6, sp ; Which is passed to the call -; CHECK: add [[REG:r[0-9]+|lr]], r6, #19456 -; CHECK: add r0, [[REG]], #536 +; CHECK: mov r0, r6 ; CHECK: bl bar define void @foo([20000 x i8]* %addr) { %tmp = alloca [4 x i32], align 32 Index: test/CodeGen/ARM/stack_align_with_base_register.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stack_align_with_base_register.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-a9 < %s | FileCheck %s --check-prefix=CHECK-THUMB +; RUN: llc -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 < %s | FileCheck %s --check-prefix=CHECK-ARM +; Check that we don't adjust arguments addressed via base pointer by the amount +; used to adjust the stack pointer when passing arguments on the stack, where a +; call frame is not reserved in the prologue and the stack needs to be +; realigned. +%struct.B = type { i32, [12 x i8] } +%struct.A = type { [2000 x i32] } + +@_ZZ4funcvE12small_struct = private unnamed_addr constant %struct.B { i32 2, [12 x i8] undef }, align 16 + +; CHECK-THUMB: add.w r0, r6, #12 +; CHECK-THUMB: bl _Z3barRi1A +; CHECK-THUMB: ldr r0, [r6, #12] +; CHECK-ARM: add r0, r6, #12 +; CHECK-ARM: bl _Z3barRi1A +; CHECK-ARM: ldr r0, [r6, #12] +; Function Attrs: noinline +define dso_local i32 @_Z4funcv() local_unnamed_addr #0 { +entry: + %big_struct = alloca %struct.A, align 4 + %small_struct = alloca %struct.B, align 16 + %val = alloca i32, align 4 + %0 = bitcast %struct.A* %big_struct to i8* + %1 = bitcast %struct.B* %small_struct to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 16 %1, i8* align 16 bitcast (%struct.B* @_ZZ4funcvE12small_struct to i8*), i32 16, i1 false) + %2 = bitcast i32* %val to i8* + store i32 1, i32* %val, align 4, !tbaa !3 + call void @_Z4initR1A(%struct.A* nonnull dereferenceable(8000) %big_struct) + call void @_Z4initR1B(%struct.B* nonnull dereferenceable(16) %small_struct) + call void @_Z3barRi1A(i32* nonnull dereferenceable(4) %val, %struct.A* byval nonnull align 4 %big_struct) + %3 = load i32, i32* %val, align 4, !tbaa !3 + ret i32 %3 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1 + +declare dso_local void @_Z4initR1A(%struct.A* dereferenceable(8000)) local_unnamed_addr #2 + +declare dso_local void @_Z4initR1B(%struct.B* dereferenceable(16)) local_unnamed_addr #2 + +declare dso_local void @_Z3barRi1A(i32* dereferenceable(4), %struct.A* byval align 4) local_unnamed_addr #2 + +attributes #0 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-crc,-crypto,-d16,-dotprod,-fp-armv8,-fp-only-sp,-fp16,-hwdiv,-hwdiv-arm,-ras,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+armv7-a,+dsp,+neon,+vfp3,-crc,-crypto,-d16,-dotprod,-fp-armv8,-fp-only-sp,-fp16,-hwdiv,-hwdiv-arm,-ras,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 7.0.0"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C++ TBAA"}