diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -169,8 +169,8 @@ findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); // Find an instruction that updates the base register of the ld/st - // instruction. - bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, + // instruction. Return update Offset or 0 if not found. + int isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset); // Merge a pre- or post-index base register update into a ld/st instruction. @@ -1698,7 +1698,7 @@ return NextI; } -bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, +int AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI, unsigned BaseReg, int Offset) { switch (MI.getOpcode()) { @@ -1739,10 +1739,10 @@ // If we have a non-zero Offset, we check that it matches the amount // we're adding to the register. if (!Offset || Offset == UpdateOffset) - return true; + return UpdateOffset; break; } - return false; + return 0; } MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( @@ -1780,6 +1780,12 @@ ModifiedRegUnits.clear(); UsedRegUnits.clear(); ++MBBI; + + // We can't post-increment the stack pointer if any instruction between + // the memory access (I) and the increment (MBBI) can access the memory + // region defined by [SP, MBBI]. + const bool BaseRegSP = BaseReg == AArch64::SP; + bool MayAliasFP = false; for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { MachineInstr &MI = *MBBI; @@ -1789,8 +1795,13 @@ ++Count; // If we found a match, return it. - if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset)) + const int Offset = isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset); + if (Offset) { + // Prevent premature stack-popping + if (MayAliasFP && Offset > 0) + continue; return MBBI; + } // Update the status of what the instruction clobbered and used. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); @@ -1800,6 +1811,30 @@ if (!ModifiedRegUnits.available(BaseReg) || !UsedRegUnits.available(BaseReg)) return E; + + // FIXME: For now we conservativly detect any instruction that might access + // the stack. We should validate that the access is in the memory region of + // two memory operands, (e.g. [SP, MBBI]), but that requires a new may-alias + // API. May also need to track if other registers might point to the stack. + if (!MayAliasFP && BaseRegSP && MBBI->mayLoadOrStore()) { + if (MBBI->memoperands_empty()){ + MayAliasFP = true; + continue; + } + MachineFrameInfo *MFI = &(I->getMF()->getFrameInfo()); + assert(MFI); + for (MachineMemOperand *MMO : MBBI->memoperands()) { + const PseudoSourceValue *PSVa = MMO->getPseudoValue(); + if (!PSVa){ + MayAliasFP = true; + break; + } + if (PSVa->mayAlias(MFI)){ + MayAliasFP = true; + break; + } + } + } } return E; } @@ -1844,7 +1879,7 @@ ++Count; // If we found a match, return it. - if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset)) + if (isMatchingUpdateInsn(*I, MI, BaseReg, Offset) != 0) return MBBI; // Update the status of what the instruction clobbered and used. diff --git a/llvm/test/CodeGen/AArch64/aarch64-ldst-no-premature-sp-pop.mir b/llvm/test/CodeGen/AArch64/aarch64-ldst-no-premature-sp-pop.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-ldst-no-premature-sp-pop.mir @@ -0,0 +1,85 @@ +# RUN: llc -start-before= +# CHECK-NOT: stp xzr, xzr, [sp], #16 +# CHECK: add sp, sp, #16 +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-none-eabi" + + define hidden i32 @foo(i32 %0) { + %2 = alloca [4 x i32], align 4 + %3 = bitcast [4 x i32]* %2 to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %3, i8 0, i64 16, i1 false) + %4 = sext i32 %0 to i64 + %5 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i64 0, i64 %4 + %6 = load i32, i32* %5, align 4 + ret i32 %6 + } + + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + declare void @llvm.stackprotector(i8*, i8**) #3 + + !llvm.module.flags = !{!0} + !llvm.ident = !{!1} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{!"clang version 11.0.0 "} + !2 = !{!3, !3, i64 0} + !3 = !{!"int", !4, i64 0} + !4 = !{!"omnipotent char", !5, i64 0} + !5 = !{!"Simple C++ TBAA"} + +... +--- +name: foo +alignment: 8 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 16 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, type: default, offset: -16, size: 16, + alignment: 8, stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -16, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.1): + liveins: $w0 + + $sp = frame-setup SUBXri $sp, 16, 0 + $x8 = ADDXri $sp, 0, 0 + STRXui $xzr, $sp, 1 :: (store 8 into %ir.3 + 8) + STRXui $xzr, $sp, 0 :: (store 8 into %ir.3) + renamable $w0 = LDRWroW killed renamable $x8, killed renamable $w0, 1, 1 :: (load 4 from %ir.5, !tbaa !2) + $sp = frame-destroy ADDXri $sp, 16, 0 + RET undef $lr, implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -1,31 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-apple-ios | FileCheck %s -; CHECK-LABEL: _test: -; CHECK-DAG: fmov.2d v0, #2.00000000 -; CHECK-DAG: and [[MASK_IDX:x[0-9]+]], x1, #0x3 -; CHECK-DAG: mov x9, sp -; CHECK-DAG: str q0, [sp], #16 -; CHECK-DAG: bfi [[PTR:x[0-9]+]], [[MASK_IDX]], #2, #2 -; CHECK: ldr s0, {{\[}}[[PTR]]{{\]}} -; CHECK: str s0, [x0] - define void @test(float * %p1, i32 %v1) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #16 ; =16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: fmov.2d v0, #2.00000000 +; CHECK-NEXT: and x8, x1, #0x3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 +; CHECK-NEXT: ldr s0, [x9] +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: add sp, sp, #16 ; =16 +; CHECK-NEXT: ret entry: %v2 = extractelement <3 x float> , i32 %v1 store float %v2, float* %p1, align 4 ret void } -; CHECK-LABEL: _test2 -; CHECK: movi.16b v0, #63 -; CHECK-DAG: and [[MASK_IDX:x[0-9]+]], x1, #0x3 -; CHECK-DAG: str q0, [sp], #16 -; CHECK-DAG: mov x9, sp -; CHECK-DAG: bfi [[PTR:x[0-9]+]], [[MASK_IDX]], #2, #2 -; CHECK: ldr s0, {{\[}}[[PTR]]{{\]}} -; CHECK: str s0, [x0] - define void @test2(float * %p1, i32 %v1) { +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #16 ; =16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: movi.16b v0, #63 +; CHECK-NEXT: and x8, x1, #0x3 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 +; CHECK-NEXT: ldr s0, [x9] +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: add sp, sp, #16 ; =16 +; CHECK-NEXT: ret entry: %v2 = extractelement <3 x float> , i32 %v1 store float %v2, float* %p1, align 4