diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1515,6 +1515,32 @@ return None; } +// Returns a boolean that represents whether there exists a register +// from FirstMI to the beginning of the block that can be renamed. If +// one exists, we update Flags with its value. +static bool updateFlagsWithRenameReg( + Optional MaybeCanRename, LdStPairFlags &Flags, MachineInstr &FirstMI, + MachineInstr &MI, LiveRegUnits &DefinedInBB, LiveRegUnits &UsedInBetween, + SmallPtrSetImpl &RequiredClasses, + const TargetRegisterInfo *TRI) { + if (DebugCounter::shouldExecute(RegRenamingCounter)) { + if (!MaybeCanRename) + MaybeCanRename = { + canRenameUpToDef(FirstMI, UsedInBetween, RequiredClasses, TRI)}; + + if (*MaybeCanRename) { + Optional MaybeRenameReg = tryToFindRegisterToRename( + FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, TRI); + if (MaybeRenameReg) { + Flags.setRenameReg(*MaybeRenameReg); + Flags.setMergeForward(true); + return true; + } + } + } + return false; +} + /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator @@ -1666,6 +1692,27 @@ continue; } } + // If the load/store pattern has been optimized and reordered + // into the following: + // ldr q0, [x1, #16] + // str q0, [x0, #16] + // ldr q0, [x1] + // str q0, [x0] + // and the destination register of the load/store instruction is + // the same register as or a sub/super register of the other + // load/store, it will not generate an LDP/STP, so we attempt to + // rename the register so that it can be recognised as a pair. + // TODO: This is currently supported for STPs, LDPs are not + // being generated yet. + if (TRI->isSuperOrSubRegisterEq(Reg, getLdStRegOp(MI).getReg())) { + bool flagsHaveRenameReg = updateFlagsWithRenameReg( + MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween, + RequiredClasses, TRI); + if (flagsHaveRenameReg) { + MBBIWithRenameReg = MBBI; + continue; + } + } // If the destination register of one load is the same register or a // sub/super register of the other load, bail and keep looking. A // load-pair instruction with both destination registers the same is @@ -1714,22 +1761,11 @@ Flags.clearRenameReg(); return MBBI; } - - if (DebugCounter::shouldExecute(RegRenamingCounter)) { - if (!MaybeCanRename) - MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, - RequiredClasses, TRI)}; - - if (*MaybeCanRename) { - Optional MaybeRenameReg = tryToFindRegisterToRename( - FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, - TRI); - if (MaybeRenameReg) { - Flags.setRenameReg(*MaybeRenameReg); - Flags.setMergeForward(true); - MBBIWithRenameReg = MBBI; - } - } + bool flagsHaveRenameReg = updateFlagsWithRenameReg( + MaybeCanRename, Flags, FirstMI, MI, DefinedInBB, UsedInBetween, + RequiredClasses, TRI); + if (flagsHaveRenameReg) { + MBBIWithRenameReg = MBBI; } } // Unable to combine these instructions due to interference in between. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/byval-call.ll @@ -33,38 +33,30 @@ ; CHECK-NEXT: .cfi_offset w28, -16 ; CHECK-NEXT: .cfi_offset w30, -24 ; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: str q0, [sp, #16] -; CHECK-NEXT: ldr q0, [x0, #32] -; CHECK-NEXT: str q0, [sp, #32] +; CHECK-NEXT: stp q1, q0, [sp] +; CHECK-NEXT: ldr q1, [x0, #32] ; CHECK-NEXT: ldr q0, [x0, #48] -; CHECK-NEXT: str q0, [sp, #48] -; CHECK-NEXT: ldr q0, [x0, #64] -; CHECK-NEXT: str q0, [sp, #64] +; CHECK-NEXT: stp q1, q0, [sp, #32] +; CHECK-NEXT: ldr q1, [x0, #64] ; CHECK-NEXT: ldr q0, [x0, #80] -; CHECK-NEXT: str q0, [sp, #80] -; CHECK-NEXT: ldr q0, [x0, #96] -; CHECK-NEXT: str q0, [sp, #96] +; CHECK-NEXT: stp q1, q0, [sp, #64] +; CHECK-NEXT: ldr q1, [x0, #96] ; CHECK-NEXT: ldr q0, [x0, #112] -; CHECK-NEXT: str q0, [sp, #112] -; CHECK-NEXT: ldr q0, [x0, #128] -; CHECK-NEXT: str q0, [sp, #128] +; CHECK-NEXT: stp q1, q0, [sp, #96] +; CHECK-NEXT: ldr q1, [x0, #128] ; CHECK-NEXT: ldr q0, [x0, #144] -; CHECK-NEXT: str q0, [sp, #144] -; CHECK-NEXT: ldr q0, [x0, #160] -; CHECK-NEXT: str q0, [sp, #160] +; CHECK-NEXT: stp q1, q0, [sp, #128] +; CHECK-NEXT: ldr q1, [x0, #160] ; CHECK-NEXT: ldr q0, [x0, #176] -; CHECK-NEXT: str q0, [sp, #176] -; CHECK-NEXT: ldr q0, [x0, #192] -; CHECK-NEXT: str q0, [sp, #192] +; CHECK-NEXT: stp q1, q0, [sp, #160] +; CHECK-NEXT: ldr q1, [x0, #192] ; CHECK-NEXT: ldr q0, [x0, #208] -; CHECK-NEXT: str q0, [sp, #208] -; CHECK-NEXT: ldr q0, [x0, #224] -; CHECK-NEXT: str q0, [sp, #224] +; CHECK-NEXT: stp q1, q0, [sp, #192] +; CHECK-NEXT: ldr q1, [x0, #224] ; CHECK-NEXT: ldr q0, [x0, #240] -; CHECK-NEXT: str q0, [sp, #240] +; CHECK-NEXT: stp q1, q0, [sp, #224] ; CHECK-NEXT: bl byval_a64i32 ; CHECK-NEXT: ldr x28, [sp, #272] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/consthoist-gep.ll b/llvm/test/CodeGen/AArch64/consthoist-gep.ll --- a/llvm/test/CodeGen/AArch64/consthoist-gep.ll +++ b/llvm/test/CodeGen/AArch64/consthoist-gep.ll @@ -3,6 +3,8 @@ ; CHECK-NOT: adrp x10, global+332 ; CHECK-NOT: add x10, x10, :lo12:global+332 ; CHECK: adrp x10, global+528 +; CHECK-NEXT: and w12, w8, #0xffffff +; CHECK-NEXT: ldr w8, [x11] ; CHECK-NEXT: add x10, x10, :lo12:global+528 %struct.blam = type { %struct.bar, %struct.bar.0, %struct.wobble, %struct.wombat, i8, i16, %struct.snork.2, %struct.foo, %struct.snork.3, %struct.wobble.4, %struct.quux, [9 x i16], %struct.spam, %struct.zot } diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll --- a/llvm/test/CodeGen/AArch64/ldst-opt.ll +++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll @@ -1117,7 +1117,7 @@ define void @post-indexed-sub-word(i32* %a, i32* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-word ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #-8 -; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #-8 +; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #-4] br label %for.body for.body: %phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ] @@ -1141,7 +1141,7 @@ define void @post-indexed-sub-doubleword(i64* %a, i64* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-doubleword ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-16 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-16 +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0, #-8] br label %for.body for.body: %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] @@ -1165,7 +1165,7 @@ define void @post-indexed-sub-quadword(<2 x i64>* %a, <2 x i64>* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-quadword ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #-32 -; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #-32 +; CHECK: stp q{{[0-9]+}}, q{{[0-9]+}}, [x0, #-16] br label %for.body for.body: %phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ] @@ -1189,7 +1189,7 @@ define void @post-indexed-sub-float(float* %a, float* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-float ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #-8 -; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #-8 +; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [x0, #-4] br label %for.body for.body: %phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ] @@ -1213,7 +1213,7 @@ define void @post-indexed-sub-double(double* %a, double* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-double ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #-16 -; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #-16 +; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [x0, #-8] br label %for.body for.body: %phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ] @@ -1237,7 +1237,7 @@ define void @post-indexed-sub-doubleword-offset-min(i64* %a, i64* %b, i64 %count) nounwind { ; CHECK-LABEL: post-indexed-sub-doubleword-offset-min ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #-256 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #-256 +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #-256 br label %for.body for.body: %phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ] @@ -1262,8 +1262,7 @@ ; CHECK-LABEL: post-indexed-doubleword-offset-out-of-range ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}] ; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}] -; CHECK: add x{{[0-9]+}}, x{{[0-9]+}}, #256 +; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [x0], #256 br label %for.body for.body: diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -544,3 +544,183 @@ RET undef $lr ... + +# During ISel, the order of load/store pairs can be optimized and changed +# which prevents STPs from being generated. These tests check that an STP +# instruction is generated after register renaming. +... +--- +# +# CHECK-LABEL: name: memcpy32 +# CHECK: liveins: $x0, $x1 +# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) +# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32) +# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 32) +# CHECK: RET undef $lr +# +name: memcpy32 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 32) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 32) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 32) + RET undef $lr + +... +--- +# +# CHECK-LABEL: name: memcpy64 +# CHECK: liveins: $x0, $x1 +# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) +# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64) +# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 64) +# CHECK: RET undef $lr +# +name: memcpy64 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 64) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 64) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 64) + RET undef $lr + +... +--- +# +# CHECK-LABEL: name: memcpy128 +# CHECK: liveins: $x0, $x1 +# CHECK: $q1 = LDRQui renamable $x1, 1 :: (load 16) +# CHECK: renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128) +# CHECK: STPQi killed renamable $q0, killed $q1, killed renamable $x0, 0 :: (store 16, align 128) +# CHECK: RET undef $lr +# +name: memcpy128 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x0', virtual-reg: '' } + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + bb.0.entry: + liveins: $x0, $x1 + renamable $q0 = LDRQui renamable $x1, 1 :: (load 16) + STRQui killed renamable $q0, renamable $x0, 1 :: (store 16, basealign 128) + renamable $q0 = LDRQui killed renamable $x1, 0 :: (load 16, align 128) + STRQui killed renamable $q0, killed renamable $x0, 0 :: (store 16, align 128) + RET undef $lr