Index: llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1515,6 +1515,32 @@ return None; } +// Returns a boolean that represents whether a register has been +// renamed or not +static bool +regHasBeenRenamed(Optional MaybeCanRename, LdStPairFlags &Flags, + MachineInstr &FirstMI, MachineInstr &MI, + LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, + SmallPtrSetImpl &RequiredClasses, + const TargetRegisterInfo *TRI) { + if (DebugCounter::shouldExecute(RegRenamingCounter)) { + if (!MaybeCanRename) + MaybeCanRename = { + canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)}; + + if (*MaybeCanRename) { + Optional MaybeRenameReg = tryToFindRegisterToRename( + FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, TRI); + if (MaybeRenameReg) { + Flags.setRenameReg(*MaybeRenameReg); + Flags.setMergeForward(true); + return true; + } + } + } + return false; +} + /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator @@ -1666,6 +1692,27 @@ continue; } } + // If the load/store pattern has been optimized and reordered + // into the following: + // ldr q0, [x1, #16] + // str q0, [x0, #16] + // ldr q0, [x1] + // str q0, [x0] + // and the destination register of the load/store instruction is + // the same register as or a sub/super register of the other + // load/store, it will not generate an LDP/STP, so we attempt to + // rename the register so that it can be recognised as a pair. + // TODO: This is currently supported for STPs, LDPs are not + // being generated yet + if (TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) { + bool renamedReg = + regHasBeenRenamed(MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, + UsedInBetween, RequiredClasses, TRI); + if (renamedReg) { + MBBIWithRenameReg = MBBI; + continue; + } + } // If the destination register of one load is the same register or a // sub/super register of the other load, bail and keep looking. A // load-pair instruction with both destination registers the same is @@ -1714,22 +1761,11 @@ Flags.clearRenameReg(); return MBBI; } - - if (DebugCounter::shouldExecute(RegRenamingCounter)) { - if (!MaybeCanRename) - MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, - RequiredClasses, TRI)}; - - if (*MaybeCanRename) { - Optional MaybeRenameReg = tryToFindRegisterToRename( - FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, - TRI); - if (MaybeRenameReg) { - Flags.setRenameReg(*MaybeRenameReg); - Flags.setMergeForward(true); - MBBIWithRenameReg = MBBI; - } - } + bool renamedReg = + regHasBeenRenamed(MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, + UsedInBetween, RequiredClasses, TRI); + if (renamedReg) { + MBBIWithRenameReg = MBBI; } } // Unable to combine these instructions due to interference in between. Index: llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll @@ -33,38 +33,30 @@ ; CHECK-NEXT: .cfi_offset w28, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: str q0, [sp, #16] -; CHECK-NEXT: ldr q0, [x0, #32] -; CHECK-NEXT: str q0, [sp, #32] +; CHECK-NEXT: stp q1, q0, [sp] +; CHECK-NEXT: ldr q1, [x0, #32] ; CHECK-NEXT: ldr q0, [x0, #48] -; CHECK-NEXT: str q0, [sp, #48] -; CHECK-NEXT: ldr q0, [x0, #64] -; CHECK-NEXT: str q0, [sp, #64] +; CHECK-NEXT: stp q1, q0, [sp, #32] +; CHECK-NEXT: ldr q1, [x0, #64] ; CHECK-NEXT: ldr q0, [x0, #80] -; CHECK-NEXT: str q0, [sp, #80] -; CHECK-NEXT: ldr q0, [x0, #96] -; CHECK-NEXT: str q0, [sp, #96] +; CHECK-NEXT: stp q1, q0, [sp, #64] +; CHECK-NEXT: ldr q1, [x0, #96] ; CHECK-NEXT: ldr q0, [x0, #112] -; CHECK-NEXT: str q0, [sp, #112] -; CHECK-NEXT: ldr q0, [x0, #128] -; CHECK-NEXT: str q0, [sp, #128] +; CHECK-NEXT: stp q1, q0, [sp, #96] +; CHECK-NEXT: ldr q1, [x0, #128] ; CHECK-NEXT: ldr q0, [x0, #144] -; CHECK-NEXT: str q0, [sp, #144] -; CHECK-NEXT: ldr q0, [x0, #160] -; CHECK-NEXT: str q0, [sp, #160] +; CHECK-NEXT: stp q1, q0, [sp, #128] +; CHECK-NEXT: ldr q1, [x0, #160] ; CHECK-NEXT: ldr q0, [x0, #176] -; CHECK-NEXT: str q0, [sp, #176] -; CHECK-NEXT: ldr q0, [x0, #192] -; CHECK-NEXT: str q0, [sp, #192] +; CHECK-NEXT: stp q1, q0, [sp, #160] +; CHECK-NEXT: ldr q1, [x0, #192] ; CHECK-NEXT: ldr q0, [x0, #208] -; CHECK-NEXT: str q0, [sp, #208] -; CHECK-NEXT: ldr q0, [x0, #224] -; CHECK-NEXT: str q0, [sp, #224] +; CHECK-NEXT: stp q1, q0, [sp, #192] +; CHECK-NEXT: ldr q1, [x0, #224] ; CHECK-NEXT: ldr q0, [x0, #240] -; CHECK-NEXT: str q0, [sp, #240] +; CHECK-NEXT: stp q1, q0, [sp, #224] ; CHECK-NEXT: bl byval_a64i32 ; CHECK-NEXT: ldr x28, [sp, #272] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload Index: llvm/test/CodeGen/AArch64/consthoist-gep.ll =================================================================== --- llvm/test/CodeGen/AArch64/consthoist-gep.ll +++ llvm/test/CodeGen/AArch64/consthoist-gep.ll @@ -3,6 +3,8 @@ ; CHECK-NOT: adrp x10, global+332 ; CHECK-NOT: add x10, x10, :lo12:global+332 ; CHECK: adrp x10, global+528 +; CHECK-NEXT: and w12, w8, #0xffffff +; CHECK-NEXT: ldr w8, [x11] ; CHECK-NEXT: add x10, x10, :lo12:global+528 %struct.blam = type { %struct.bar, %struct.bar.0, %struct.wobble, %struct.wombat, i8, i16, %struct.snork.2, %struct.foo, %struct.snork.3, %struct.wobble.4, %struct.quux, [9 x i16], %struct.spam, %struct.zot } Index: llvm/test/CodeGen/AArch64/ldst-opt.ll =================================================================== --- llvm/test/CodeGen/AArch64/ldst-opt.ll +++ llvm/test/CodeGen/AArch64/ldst-opt.ll @@ -1117,7 +1117,7 @@ define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-word ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8 -; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8 +; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #-4] br label %for.body for.body: %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ] @@ -1141,7 +1141,7 @@ define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-doubleword ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16 +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0, #-8] br label %for.body for.body: %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] @@ -1165,7 +1165,7 @@ define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-quadword ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32 -; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32 +; CHECK: stp q{{[0-9]+}}, q{{[0-9]+}}, [x0, #-16] br label %for.body for.body: %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ] @@ -1189,7 +1189,7 @@ define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-float ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8 -; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8 +; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [x0, #-4] br label %for.body for.body: %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ] @@ -1213,7 +1213,7 @@ define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-double ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16 -; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16 +; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [x0, #-8] br label %for.body for.body: %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ] @@ -1237,7 +1237,7 @@ define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-doubleword-offset-min ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256 +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #-256 br label %for.body for.body: %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] @@ -1262,8 +1262,7 @@ ; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}] ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}] -; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #256 br label %for.body for.body: Index: llvm/test/CodeGen/AArch64/memcpy.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/memcpy.mir @@ -0,0 +1,175 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=aarch64-ldst-opt %s -o - | FileCheck %s + +... +--- +name: memcpy32 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: memcpy32 + ; CHECK: liveins: $x0, $x1 + ; CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32) + ; CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 32) + ; CHECK: RET undef $lr + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32) + RET undef $lr + +... +--- +name: memcpy64 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: memcpy64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64) + ; CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 64) + ; CHECK: RET undef $lr + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 64) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 64) + RET undef $lr + +... +--- +name: memcpy128 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: memcpy128 + ; CHECK: liveins: $x0, $x1 + ; CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128) + ; CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 128) + ; CHECK: RET undef $lr + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 128) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 128) + RET undef $lr + +...