diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -734,8 +734,11 @@ NextI = next_nodbg(NextI, E); unsigned Opc = I->getOpcode(); + unsigned MergeMIOpc = MergeMI->getOpcode(); bool IsScaled = !TII->hasUnscaledLdStOffset(Opc); - int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I); + bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc); + int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1; + int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1; bool MergeForward = Flags.getMergeForward(); // Insert our new paired instruction after whichever of the paired @@ -749,15 +752,20 @@ // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI; - if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() == - AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) + int64_t ScaledIAddress = + AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride; + int64_t ScaledMergeMIAddress = + AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() * + MergeMIOffsetStride; + if (ScaledIAddress == ScaledMergeMIAddress + OffsetStride) RtMI = &*MergeMI; else RtMI = &*I; int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm(); + bool IsScaledRtMI = !TII->hasUnscaledLdStOffset(RtMI->getOpcode()); // Change the scaled offset from small to large type. - if (IsScaled) { + if (IsScaledRtMI) { assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); OffsetImm /= 2; } diff --git a/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.ll b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +%struct.d = type { i16, i16, i16 } + +define void @merge_unscaled_str_with_scaled() { +; CHECK-LABEL: merge_unscaled_str_with_scaled: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: str xzr, [sp, #64] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +entry: + %ptr.1 = alloca i32, align 4 + %ptr.2 = alloca %struct.d, align 2 + store i32 0, ptr %ptr.1, align 4 + %0 = getelementptr inbounds i8, ptr %ptr.2, i64 2 + store i32 0, ptr %0, align 2 + ret void +} + +define void @merge_scaled_str_with_scaled_str(){ +; CHECK-LABEL: merge_scaled_str_with_scaled_str: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: str xzr, [sp, #8] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %ptr.1 = alloca i32, align 4 + %ptr.2 = alloca i32, align 4 + store i32 0, ptr %ptr.1, align 4 + store i32 0, ptr %ptr.2, align 4 + ret void +} + +define void @merge_scaled_str_with_scaled_str_2() { +; CHECK-LABEL: merge_scaled_str_with_scaled_str_2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: str wzr, [sp, #12] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +entry: + %g = alloca i16, align 2 + %h = alloca i16, align 2 + store i16 0, ptr %g, align 2 + store i16 0, ptr %h, align 2 + ret void +}