diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -734,8 +734,11 @@ NextI = next_nodbg(NextI, E); unsigned Opc = I->getOpcode(); + unsigned MergeMIOpc = MergeMI->getOpcode(); bool IsScaled = !TII->hasUnscaledLdStOffset(Opc); - int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I); + bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc); + int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1; + int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1; bool MergeForward = Flags.getMergeForward(); // Insert our new paired instruction after whichever of the paired @@ -749,17 +752,32 @@ // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI; - if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() == - AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) + int64_t IOffsetInBytes = + AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride; + int64_t MIOffsetInBytes = + AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() * + MergeMIOffsetStride; + if (IOffsetInBytes > MIOffsetInBytes) RtMI = &*MergeMI; else RtMI = &*I; int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm(); - // Change the scaled offset from small to large type. - if (IsScaled) { - assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge"); - OffsetImm /= 2; + bool IsScaledRtMI = !TII->hasUnscaledLdStOffset(RtMI->getOpcode()); + int OffsetStrideRtMI = IsScaledRtMI ? TII->getMemScale(*RtMI) : 1; + // Normalise final offset to be expressed in bytes. + if (IsScaledRtMI) { + OffsetImm *= OffsetStrideRtMI; + } + + // Adjust final offset if the result opcode is a scaled store. + int NewOpcode = getMatchingWideOpcode(Opc); + bool FinalIsScaled = !TII->hasUnscaledLdStOffset(NewOpcode); + if (FinalIsScaled) { + int NewOffsetStride = FinalIsScaled ? TII->getMemScale(NewOpcode) : 1; + assert(((OffsetImm % NewOffsetStride) == 0) && + "Offset should be a multiple of the store memory scale"); + OffsetImm = OffsetImm / NewOffsetStride; } // Construct the new instruction. diff --git a/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir @@ -0,0 +1,172 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -run-pass=aarch64-ldst-opt %s -o - | FileCheck %s + + +--- +name: merge_unscaled_str_with_unscaled_str_8 +body: | + bb.0: + ; CHECK-LABEL: name: merge_unscaled_str_with_unscaled_str_8 + ; CHECK: STURHHi $wzr, $x0, 4 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STURBBi $wzr, $x0, 4 :: (store (s8)) + STURBBi $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_scaled_str_with_scaled_str_8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_scaled_str_8 + ; CHECK: STRHHui $wzr, $x0, 2 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STRBBui $wzr, $x0, 4 :: (store (s8)) + STRBBui $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_8 + ; CHECK: STRBBui $wzr, $x0, 4 :: (store (s8)) + ; CHECK-NEXT: STURBBi $wzr, $x0, 5 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STRBBui $wzr, $x0, 4 :: (store (s8)) + STURBBi $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_8 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_8 + ; CHECK: STURBBi $wzr, $x0, 4 :: (store (s8)) + ; CHECK-NEXT: STRBBui $wzr, $x0, 5 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STURBBi $wzr, $x0, 4 :: (store (s8)) + STRBBui $wzr, $x0, 5 :: (store (s8)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_unscaled_str_16 +body: | + bb.0: + ; CHECK-LABEL: name: merge_unscaled_str_with_unscaled_str_16 + ; CHECK: STURWi $wzr, $x0, 4 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STURHHi $wzr, $x0, 4 :: (store (s16)) + STURHHi $wzr, $x0, 6 :: (store (s16)) + RET undef $lr +... +--- +name: merge_scaled_str_with_scaled_str_16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_scaled_str_16 + ; CHECK: STRWui $wzr, $x0, 2 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STRHHui $wzr, $x0, 4 :: (store (s16)) + STRHHui $wzr, $x0, 5 :: (store (s16)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_16 + ; CHECK: STRHHui $wzr, $x0, 2 :: (store (s16)) + ; CHECK-NEXT: STURHHi $wzr, $x0, 6 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STRHHui $wzr, $x0, 2 :: (store (s16)) + STURHHi $wzr, $x0, 6 :: (store (s16)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_16 + ; CHECK: STURHHi $wzr, $x0, 4 :: (store (s16)) + ; CHECK-NEXT: STRHHui $wzr, $x0, 3 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STURHHi $wzr, $x0, 4 :: (store (s16)) + STRHHui $wzr, $x0, 3 :: (store (s16)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_unscaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_unscaled_32 + ; CHECK: STURXi $xzr, $x0, 4 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STURWi $wzr, $x0, 4 :: (store (s32)) + STURWi $wzr, $x0, 8 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_scaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_scaled_32 + ; CHECK: STRXui $xzr, $x0, 1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 2 :: (store (s32)) + STRWui $wzr, $x0, 3 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_32 + ; CHECK: STRXui $xzr, $x0, 1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 2 :: (store (s32)) + STURWi $wzr, $x0, 12 :: (store (s32)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_32 +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_32 + ; CHECK: STURXi $xzr, $x0, 8 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STURWi $wzr, $x0, 8 :: (store (s32)) + STRWui $wzr, $x0, 3 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_32_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_32_lower_address_second + ; CHECK: STRXui $xzr, $x0, 1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 3 :: (store (s32)) + STURWi $wzr, $x0, 8 :: (store (s32)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_32_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_32_lower_address_second + ; CHECK: STURXi $xzr, $x0, 4 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STURWi $wzr, $x0, 8 :: (store (s32)) + STRWui $wzr, $x0, 1 :: (store (s32)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_32_negative_address +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_32_negative_address + ; CHECK: STPWi $wzr, $wzr, $x0, -1 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr + STRWui $wzr, $x0, 0 :: (store (s32)) + STURWi $wzr, $x0, -4 :: (store (s32)) + RET undef $lr