Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1470,6 +1470,8 @@ bool IsUnscaled = TII->isUnscaledLdSt(MI); int Offset = getLdStOffsetOp(MI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; + if (Offset > 0) + Offset -= OffsetStride; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return false; Index: test/CodeGen/AArch64/ldst-opt.ll =================================================================== --- test/CodeGen/AArch64/ldst-opt.ll +++ test/CodeGen/AArch64/ldst-opt.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK -check-prefix=CHECKNOCOMBINE %s +; RUN: llc -mtriple=aarch64-linux-gnu -O1 --combiner-alias-analysis=true -aarch64-enable-atomic-cfg-tidy=0 -disable-lsr -verify-machineinstrs -o - %s | FileCheck -check-prefix=CHECK -check-prefix=CHECKCOMBINE %s ; This file contains tests for the AArch64 load/store optimizer. @@ -1410,10 +1411,14 @@ ; err on the side that allows for stp q instruction generation. define void @merge_zr32_3(i32* %p) { ; CHECK-LABEL: merge_zr32_3: -; CHECK: // %entry -; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 -; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] -; CHECK-NEXT: ret +; CHECKNOCOMBINE: // %entry +; CHECKNOCOMBINE-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 +; CHECKNOCOMBINE-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] +; CHECKNOCOMBINE-NEXT: ret +; CHECKCOMBINE: // %entry +; CHECKCOMBINE-NEXT: stp xzr, xzr, [x0] +; CHECKCOMBINE-NEXT: stp xzr, xzr, [x0, #16] +; CHECKCOMBINE-NEXT: ret entry: store i32 0, i32* %p %p1 = getelementptr i32, i32* %p, i32 1 @@ -1506,10 +1511,14 @@ ; vector store since the zero constant vector has multiple uses. define void @merge_zr64_2(i64* %p) { ; CHECK-LABEL: merge_zr64_2: -; CHECK: // %entry -; CHECK-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 -; CHECK-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] -; CHECK-NEXT: ret +; CHECKNOCOMBINE: // %entry +; CHECKNOCOMBINE-NEXT: movi v[[REG:[0-9]]].2d, #0000000000000000 +; CHECKNOCOMBINE-NEXT: stp q[[REG]], q[[REG]], [x{{[0-9]+}}] +; CHECKNOCOMBINE-NEXT: ret +; CHECKCOMBINE: // %entry +; CHECKCOMBINE-NEXT: stp xzr, xzr, [x0] +; CHECKCOMBINE-NEXT: stp xzr, xzr, [x0, #16] +; CHECKCOMBINE-NEXT: ret entry: store i64 0, i64* %p %p1 = getelementptr i64, i64* %p, i64 1