Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2228,6 +2228,7 @@ case AArch64::LDRWpre: case AArch64::LDURXi: case AArch64::LDRXpre: + case AArch64::LDRSWpre: case AArch64::LDURSWi: case AArch64::LDURHHi: case AArch64::LDURBBi: @@ -2437,6 +2438,7 @@ case AArch64::LDURXi: case AArch64::LDRXpre: case AArch64::LDURSWi: + case AArch64::LDRSWpre: return true; } } @@ -2557,7 +2559,8 @@ // Can't merge/pair if the instruction modifies the base register. // e.g., ldr x0, [x0] // This case will never occur with an FI base. - // However, if the instruction is an LDR/STRpre, it can be merged. + // However, if the instruction is an LDRpre or + // STRpre, it can be merged. // For example: // ldr q0, [x11, #32]! // ldr q1, [x11, #16] @@ -3134,6 +3137,7 @@ case AArch64::LDRSpre: case AArch64::LDRSWui: case AArch64::LDURSWi: + case AArch64::LDRSWpre: case AArch64::LDRWpre: case AArch64::LDRWui: case AArch64::LDURWi: @@ -3189,6 +3193,7 @@ return false; case AArch64::LDRWpre: case AArch64::LDRXpre: + case AArch64::LDRSWpre: case AArch64::LDRSpre: case AArch64::LDRDpre: case AArch64::LDRQpre: Index: llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -293,6 +293,8 @@ return AArch64::LDRWui; case AArch64::LDURSWi: return AArch64::LDURWi; + case AArch64::LDRSWpre: + return AArch64::LDRWpre; } } @@ -372,6 +374,8 @@ case AArch64::LDRSWui: case AArch64::LDURSWi: return AArch64::LDPSWi; + case AArch64::LDRSWpre: + return AArch64::LDPSWpre; } } @@ -585,6 +589,8 @@ return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi); case AArch64::LDRXpre: return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi); + case AArch64::LDRSWpre: + return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi); } } @@ -1340,7 +1346,7 @@ return false; // The STRpre - STRui and - // LDRpre-LDRui + // LDRpre-LDRui // are candidate pairs that can be merged. if (isPreLdStPairCandidate(FirstMI, MI)) return true; Index: llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir =================================================================== --- llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir +++ llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir @@ -588,7 +588,7 @@ --- -name: 21-ldrswpre-ldrswui-no-merge +name: 21-ldrswpre-ldrswui-merge tracksRegLiveness: true liveins: - { reg: '$x0' } @@ -599,10 +599,9 @@ body: | bb.0: liveins: $x0, $x1, $x2 - ; CHECK-LABEL: name: 21-ldrswpre-ldrswui-no-merge + ; CHECK-LABEL: name: 21-ldrswpre-ldrswui-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) - ; CHECK: renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) + ; CHECK: early-clobber $x1, renamable $x0, renamable $x2 = LDPSWpre renamable $x1, 10 :: (load (s32)) ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) ; CHECK: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32)) @@ -614,7 +613,7 @@ --- -name: 22-ldrswpre-ldurswi-no-merge +name: 22-ldrswpre-ldurswi-merge tracksRegLiveness: true liveins: - { reg: '$x0' } @@ -625,10 +624,9 @@ body: | bb.0: liveins: $x0, $x1, $x2 - ; CHECK-LABEL: name: 22-ldrswpre-ldurswi-no-merge + ; CHECK-LABEL: name: 22-ldrswpre-ldurswi-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) - ; CHECK: renamable $x2 = LDURSWi renamable $x1, 4 :: (load (s32)) + ; CHECK: early-clobber $x1, renamable $x0, renamable $x2 = LDPSWpre renamable $x1, 10 :: (load (s32)) ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) ; CHECK: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32))