Index: llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1515,6 +1515,32 @@ return None; } +// Returns a boolean that represents whether a register has been +// renamed or not +static bool +regHasBeenRenamed(Optional MaybeCanRename, LdStPairFlags &Flags, + MachineInstr &FirstMI, MachineInstr &MI, + LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, + SmallPtrSetImpl &RequiredClasses, + const TargetRegisterInfo *TRI) { + if (DebugCounter::shouldExecute(RegRenamingCounter)) { + if (!MaybeCanRename) + MaybeCanRename = { + canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)}; + + if (*MaybeCanRename) { + Optional MaybeRenameReg = tryToFindRegisterToRename( + FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, TRI); + if (MaybeRenameReg) { + Flags.setRenameReg(*MaybeRenameReg); + Flags.setMergeForward(true); + return true; + } + } + } + return false; +} + /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator @@ -1666,12 +1692,39 @@ continue; } } + // If the load/store pattern has been optimized and reordered + // into the following: + // ldr q0, [x1, #16] + // str q0, [x0, #16] + // ldr q0, [x1] + // str q0, [x0] + // and the destination register of the load/store instruction is + // the same register as or a sub/super register of the other + // load/store, it will not generate an LDP/STP, so we attempt to + // rename the register so that it can be recognised as a pair. + // TODO: This is currently supported for STPs, LDPs are not + // being generated yet + ArrayRef MMOs = MI.memoperands(); + MachineMemOperand *MMO = MMOs[0]; + int MIAlign = MMO->getAlign().value(); + + if (TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg()) && + MIAlign > 16) { + bool renamedReg = + regHasBeenRenamed(MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, + UsedInBetween, RequiredClasses, TRI); + if (renamedReg) { + MBBIWithRenameReg = MBBI; + continue; + } + } // If the destination register of one load is the same register or a // sub/super register of the other load, bail and keep looking. A // load-pair instruction with both destination registers the same is // UNPREDICTABLE and will result in an exception. if (MayLoad && - TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) { + TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg()) && + MIAlign <= 16) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); @@ -1714,22 +1767,11 @@ Flags.clearRenameReg(); return MBBI; } - - if (DebugCounter::shouldExecute(RegRenamingCounter)) { - if (!MaybeCanRename) - MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, - RequiredClasses, TRI)}; - - if (*MaybeCanRename) { - Optional MaybeRenameReg = tryToFindRegisterToRename( - FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, - TRI); - if (MaybeRenameReg) { - Flags.setRenameReg(*MaybeRenameReg); - Flags.setMergeForward(true); - MBBIWithRenameReg = MBBI; - } - } + bool renamedReg = + regHasBeenRenamed(MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, + UsedInBetween, RequiredClasses, TRI); + if (renamedReg) { + MBBIWithRenameReg = MBBI; } } // Unable to combine these instructions due to interference in between. Index: llvm/test/CodeGen/AArch64/memcpy.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/memcpy.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -aarch64-load-store-renaming=true | FileCheck %s --check-prefix=CHECK-RENAME +; RUN: llc < %s -mtriple=aarch64-linux-gnu -aarch64-load-store-renaming=false | FileCheck %s + +define dso_local void @memcpy32(i8* nocapture %a, i8* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +; CHECK-RENAME-LABEL: memcpy32: +; CHECK-RENAME: // %bb.0: // %entry +; CHECK-RENAME-NEXT: ldr q1, [x1, #16] +; CHECK-RENAME-NEXT: ldr q0, [x1] +; CHECK-RENAME-NEXT: stp q0, q1, [x0] +; CHECK-RENAME-NEXT: ret +; +; CHECK-LABEL: memcpy32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x1, #16] +; CHECK-NEXT: str q0, [x0, #16] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 32 dereferenceable(32) %a, i8* noundef nonnull align 32 dereferenceable(32) %b, i64 32, i1 false) + ret void +} + +define void @memcpy64(i8* nocapture %a, i8* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +; CHECK-RENAME-LABEL: memcpy64: +; CHECK-RENAME: // %bb.0: // %entry +; CHECK-RENAME-NEXT: ldr q1, [x1, #16] +; CHECK-RENAME-NEXT: ldr q0, [x1] +; CHECK-RENAME-NEXT: stp q0, q1, [x0] +; CHECK-RENAME-NEXT: ret +; +; CHECK-LABEL: memcpy64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x1, #16] +; CHECK-NEXT: str q0, [x0, #16] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(32) %a, i8* noundef nonnull align 64 dereferenceable(32) %b, i64 32, i1 false) + ret void +} + +define void @memcpy128(i8* nocapture %a, i8* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +; CHECK-RENAME-LABEL: memcpy128: +; CHECK-RENAME: // %bb.0: // %entry +; CHECK-RENAME-NEXT: ldr q1, [x1, #16] +; CHECK-RENAME-NEXT: ldr q0, [x1] +; CHECK-RENAME-NEXT: stp q0, q1, [x0] +; CHECK-RENAME-NEXT: ret +; +; CHECK-LABEL: memcpy128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr q0, [x1, #16] +; CHECK-NEXT: str q0, [x0, #16] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 128 dereferenceable(32) %a, i8* noundef nonnull align 128 dereferenceable(32) %b, i64 32, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 Index: llvm/test/CodeGen/AArch64/memcpy.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/memcpy.mir @@ -0,0 +1,175 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=aarch64-ldst-opt %s -o - | FileCheck %s + +... +--- +name: memcpy32 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: memcpy32 + ; CHECK: liveins: $x0, $x1 + ; CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32) + ; CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 32) + ; CHECK: RET undef $lr + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32) + RET undef $lr + +... +--- +name: memcpy64 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: memcpy64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64) + ; CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 64) + ; CHECK: RET undef $lr + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 64) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 64) + RET undef $lr + +... +--- +name: memcpy128 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: memcpy128 + ; CHECK: liveins: $x0, $x1 + ; CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) + ; CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128) + ; CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 128) + ; CHECK: RET undef $lr + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 128) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 128) + RET undef $lr + +...