Index: llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1515,6 +1515,32 @@ return None; } +// Returns a boolean that represents whether a register has been +// renamed or not +static bool +regHasBeenRenamed(Optional MaybeCanRename, LdStPairFlags &Flags, + MachineInstr &FirstMI, MachineInstr &MI, + LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, + SmallPtrSetImpl &RequiredClasses, + const TargetRegisterInfo *TRI) { + if (DebugCounter::shouldExecute(RegRenamingCounter)) { + if (!MaybeCanRename) + MaybeCanRename = { + canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)}; + + if (*MaybeCanRename) { + Optional MaybeRenameReg = tryToFindRegisterToRename( + FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, TRI); + if (MaybeRenameReg) { + Flags.setRenameReg(*MaybeRenameReg); + Flags.setMergeForward(true); + return true; + } + } + } + return false; +} + /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator @@ -1670,8 +1696,24 @@ // sub/super register of the other load, bail and keep looking. A // load-pair instruction with both destination registers the same is // UNPREDICTABLE and will result in an exception. + ArrayRef MMOs = MI.memoperands(); + MachineMemOperand *MMO = MMOs[0]; + int MIAlign = MMO->getAlign().value(); + + if (TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg()) && + MIAlign > 16) { + bool renamedReg = + regHasBeenRenamed(MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, + UsedInBetween, RequiredClasses, TRI); + if (renamedReg) { + MBBIWithRenameReg = MBBI; + continue; + } + } + if (MayLoad && - TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) { + TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg()) && + MIAlign <= 16) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); @@ -1714,22 +1756,11 @@ Flags.clearRenameReg(); return MBBI; } - - if (DebugCounter::shouldExecute(RegRenamingCounter)) { - if (!MaybeCanRename) - MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, - RequiredClasses, TRI)}; - - if (*MaybeCanRename) { - Optional MaybeRenameReg = tryToFindRegisterToRename( - FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, - TRI); - if (MaybeRenameReg) { - Flags.setRenameReg(*MaybeRenameReg); - Flags.setMergeForward(true); - MBBIWithRenameReg = MBBI; - } - } + bool renamedReg = + regHasBeenRenamed(MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, + UsedInBetween, RequiredClasses, TRI); + if (renamedReg) { + MBBIWithRenameReg = MBBI; } } // Unable to combine these instructions due to interference in between. Index: llvm/test/CodeGen/AArch64/memcpy.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/memcpy.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +define dso_local void @memcpy32(i8* nocapture %a, i8* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: @memcpy32 +; CHECK: %bb.0: // %entry +; CHECK-NEXT: ldr q1, [x1, #16] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 32 dereferenceable(32) %a, i8* noundef nonnull align 32 dereferenceable(32) %b, i64 32, i1 false) + ret void +} + +define void @memcpy64(i8* nocapture %a, i8* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: @memcpy64 +; CHECK: %bb.0: +; CHECK-NEXT: ldr q1, [x1, #16] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 64 dereferenceable(32) %a, i8* noundef nonnull align 64 dereferenceable(32) %b, i64 32, i1 false) + ret void +} + +define void @memcpy128(i8* nocapture %a, i8* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: @memcpy128 +; CHECK: %bb.0: +; CHECK-NEXT: ldr q1, [x1, #16] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 128 dereferenceable(32) %a, i8* noundef nonnull align 128 dereferenceable(32) %b, i64 32, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1