Index: llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -736,6 +736,16 @@ } } +static bool isRewritableImplicitDef(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::ORRWrs: + case AArch64::ADDWri: + return true; + } +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MergeMI, @@ -871,12 +881,13 @@ // Return the sub/super register for RenameReg, matching the size of // OriginalReg. - auto GetMatchingSubReg = [this, - RenameReg](MCPhysReg OriginalReg) -> MCPhysReg { - for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg)) - if (TRI->getMinimalPhysRegClass(OriginalReg) == - TRI->getMinimalPhysRegClass(SubOrSuper)) + auto GetMatchingSubReg = + [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg { + for (MCPhysReg SubOrSuper : + TRI->sub_and_superregs_inclusive(*RenameReg)) { + if (C->contains(SubOrSuper)) return SubOrSuper; + } llvm_unreachable("Should have found matching sub or super register!"); }; @@ -884,7 +895,8 @@ [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) { if (IsDef) { bool SeenDef = false; - for (auto &MOP : MI.operands()) { + for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &MOP = MI.getOperand(OpIdx); // Rename the first explicit definition and all implicit // definitions matching RegToRename. if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && @@ -893,18 +905,33 @@ assert((MOP.isImplicit() || (MOP.isRenamable() && !MOP.isEarlyClobber())) && "Need renamable operands"); - MOP.setReg(GetMatchingSubReg(MOP.getReg())); + Register MatchingReg; + if (const TargetRegisterClass *RC = + MI.getRegClassConstraint(OpIdx, TII, TRI)) + MatchingReg = GetMatchingSubReg(RC); + else { + if (!isRewritableImplicitDef(MI.getOpcode())) + continue; + MatchingReg = GetMatchingSubReg( + TRI->getMinimalPhysRegClass(MOP.getReg())); + } + MOP.setReg(MatchingReg); SeenDef = true; } } } else { - for (auto &MOP : MI.operands()) { + for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &MOP = MI.getOperand(OpIdx); if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && TRI->regsOverlap(MOP.getReg(), RegToRename)) { assert((MOP.isImplicit() || (MOP.isRenamable() && !MOP.isEarlyClobber())) && "Need renamable operands"); - MOP.setReg(GetMatchingSubReg(MOP.getReg())); + const TargetRegisterClass *RC = + MI.getRegClassConstraint(OpIdx, TII, TRI); + if (!RC) + continue; + MOP.setReg(GetMatchingSubReg(RC)); } } } @@ -1386,6 +1413,16 @@ << MOP << ")\n"); return false; } + + // We cannot rename arbitrary implicit-defs, the specific rule to rewrite + // them must be known. For example, in ORRWrs the implicit-def + // corresponds to the result register. + if (MOP.isImplicit() && MOP.isDef()) { + if (!isRewritableImplicitDef(MOP.getParent()->getOpcode())) + return false; + return TRI->isSuperOrSubRegisterEq( + MOP.getParent()->getOperand(0).getReg(), MOP.getReg()); + } } return MOP.isImplicit() || (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); @@ -1492,10 +1529,9 @@ // required register classes. auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) { return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) { - return any_of(TRI->sub_and_superregs_inclusive(PR), - [C, TRI](MCPhysReg SubOrSuper) { - return C == TRI->getMinimalPhysRegClass(SubOrSuper); - }); + return any_of( + TRI->sub_and_superregs_inclusive(PR), + [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); }); }); }; Index: llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -14,9 +14,9 @@ ; CHECK-NEXT: stp w6, w5, [sp, #36] ; CHECK-NEXT: str w7, [sp, #32] ; CHECK-NEXT: str w8, [x0] -; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w0, [sp, #72] ; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: stp w8, w9, [sp, #16] +; CHECK-NEXT: stp w8, w0, [sp, #16] ; CHECK-NEXT: add x8, sp, #72 ; =72 ; CHECK-NEXT: add x8, x8, #24 ; =24 ; CHECK-NEXT: str x8, [sp, #24] @@ -64,18 +64,18 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; =96 ; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill -; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: stp w8, w9, [sp, #72] -; CHECK-NEXT: mov w9, #3 +; CHECK-NEXT: stp w8, w0, [sp, #72] +; CHECK-NEXT: mov w0, #3 ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: stp w8, w9, [sp, #64] -; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: stp w8, w0, [sp, #64] +; CHECK-NEXT: mov w0, #5 ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: stp w8, w9, [sp, #56] -; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: stp w8, w0, [sp, #56] +; CHECK-NEXT: mov w0, #7 ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: stp w8, w9, [sp, #48] +; CHECK-NEXT: stp w8, w0, [sp, #48] ; CHECK-NEXT: mov w8, #9 ; CHECK-NEXT: mov w9, #10 ; CHECK-NEXT: stp w9, w8, [sp, #40] Index: llvm/test/CodeGen/AArch64/machine-outliner.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-outliner.ll +++ llvm/test/CodeGen/AArch64/machine-outliner.ll @@ -92,15 +92,15 @@ ; ODR: [[OUTLINED]]: ; CHECK: .p2align 2 ; CHECK-NEXT: [[OUTLINED]]: -; CHECK: mov w9, #1 +; CHECK: mov w0, #1 ; CHECK-DAG: mov w8, #2 -; CHECK-DAG: stp w8, w9, [sp, #24] -; CHECK-DAG: mov w9, #3 +; CHECK-DAG: stp w8, w0, [sp, #24] +; CHECK-DAG: mov w0, #3 ; CHECK-DAG: mov w8, #4 -; CHECK-DAG: stp w8, w9, [sp, #16] -; CHECK-DAG: mov w9, #5 +; CHECK-DAG: stp w8, w0, [sp, #16] +; CHECK-DAG: mov w0, #5 ; CHECK-DAG: mov w8, #6 -; CHECK-DAG: stp w8, w9, [sp, #8] +; CHECK-DAG: stp w8, w0, [sp, #8] ; CHECK-DAG: add sp, sp, #32 ; CHECK-DAG: ret Index: llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir =================================================================== --- llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir +++ llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir @@ -20,12 +20,12 @@ # CHECK-LABEL: name: test_dbg_value1 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK: $x2, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) # CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: DBG_VALUE $x9, $noreg # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x2, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test_dbg_value1 alignment: 4 Index: llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir =================================================================== --- llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir +++ llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir @@ -1,25 +1,26 @@ -# RUN: llc -run-pass=aarch64-ldst-opt -aarch64-load-store-renaming=true -mattr=+reserve-x10 \ -# RUN: -mattr=+reserve-x11 -mattr=+reserve-x15 -mtriple=arm64-apple-iphoneos -verify-machineinstrs \ +# RUN: llc -run-pass=aarch64-ldst-opt -aarch64-load-store-renaming=true \ +# RUN: -mattr=+reserve-x2,+reserve-x15 \ +# RUN: -mtriple=arm64-apple-iphoneos -verify-machineinstrs \ # RUN: -o - %s | FileCheck --check-prefix=CHECK --check-prefix=PRESERVED %s # RUN: llc -run-pass=aarch64-ldst-opt -aarch64-load-store-renaming=true -mtriple=arm64-apple-iphoneos \ # RUN: -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOPRES %s -# Make sure we do not pick reserved registers. For test1, we would pick x10, +# Make sure we do not pick reserved registers. For test1, we would pick x2, # and for test2 we would pick x15, both of which are reserved. # --- # CHECK-LABEL: name: test1 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# PRESERVED: $x12, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) -# NOPRES: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# PRESERVED: $x3, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# NOPRES: $x2, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) # CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# PRESERVED-NEXT: STPXi renamable $x8, killed $x12, renamable $x0, 10 :: (store (s64), align 4) -# NOPRES-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) +# PRESERVED-NEXT: STPXi renamable $x8, killed $x3, renamable $x0, 10 :: (store (s64), align 4) +# NOPRES-NEXT: STPXi renamable $x8, killed $x2, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test1 @@ -47,7 +48,7 @@ ... # CHECK-LABEL: name: test2 # CHECK: bb.0: -# CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13 +# CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13 # CHECK: renamable $w19 = LDRWui renamable $x0, 0 :: (load (s64)) # PRESERVED-NEXT: $x18, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) # NOPRES-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) @@ -73,7 +74,7 @@ machineFunctionInfo: {} body: | bb.0: - liveins: $x0, $x1, $x10, $x11, $x12, $x13 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13 renamable $w19 = LDRWui renamable $x0, 0 :: (load (s64)) renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) Index: llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir =================================================================== --- llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -10,11 +10,11 @@ # CHECK-LABEL: name: test1 # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# CHECK: $x10, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK: $x2, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) # CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x2, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test1 @@ -45,11 +45,11 @@ # CHECK-LABEL: bb.0: # CHECK-NEXT: liveins: $x0, $x9, $x1 -# CHECK: $x10, renamable $x8 = LDPXi renamable $x9, 0 :: (load (s64)) +# CHECK: $x2, renamable $x8 = LDPXi renamable $x9, 0 :: (load (s64)) # CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) # CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x10, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x2, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr name: test2 @@ -114,9 +114,9 @@ # CHECK-LABEL: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# CHECK: $x9 = MRS 58880 +# CHECK: $x2 = MRS 58880 # CHECK-NEXT: renamable $x8 = MRS 55840 -# CHECK-NEXT: STPXi $x9, killed renamable $x8, killed renamable $x0, 0 :: (store (s32)) +# CHECK-NEXT: STPXi $x2, killed renamable $x8, killed renamable $x0, 0 :: (store (s32)) # CHECK-NEXT: RET undef $lr name: test4 @@ -146,9 +146,9 @@ # CHECK-LABEL: bb.0: # CHECK-NEXT: liveins: $x0, $x1 -# CHECK: $x9 = MRS 58880 +# CHECK: $x2 = MRS 58880 # CHECK-NEXT: renamable $x8 = MRS 55840 -# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store (s32)) +# CHECK-NEXT: STPWi $w2, killed renamable $w8, killed renamable $x0, 0 :: (store (s32)) # CHECK-NEXT: RET undef $lr name: test5 @@ -255,9 +255,9 @@ # CHECK-NEXT: liveins: $x0, $x1 # CHECK: renamable $x8 = MRS 58880 -# CHECK-NEXT: $w9 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x9 +# CHECK-NEXT: $w2 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x2 # CHECK-NEXT: renamable $x8 = MRS 55840 -# CHECK-NEXT: STPWi $w9, killed renamable $w8, killed renamable $x0, 0 :: (store (s32)) +# CHECK-NEXT: STPWi $w2, killed renamable $w8, killed renamable $x0, 0 :: (store (s32)) # CHECK-NEXT: RET undef $lr name: test8 @@ -361,7 +361,7 @@ # ($x14 in this example) # CHECK-LABEL: name: test11 # CHECK: bb.0: -# CHECK-NEXT: liveins: $x0, $x1, $x11, $x12, $x13 +# CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x11, $x12, $x13 # CHECK: renamable $w10 = LDRWui renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: $x15, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) @@ -386,7 +386,7 @@ machineFunctionInfo: {} body: | bb.0: - liveins: $x0, $x1, $x11, $x12, $x13 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x11, $x12, $x13 renamable $w10 = LDRWui renamable $x0, 0 :: (load (s64)) renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) @@ -407,10 +407,10 @@ # CHECK-NEXT: liveins: $x0, $x1 # # CHECK: renamable $x10 = LDRXui renamable $x0, 0 :: (load (s64)) -# CHECK-NEXT: $x11, renamable $x8 = LDPXi renamable $x0, 3 :: (load (s64)) +# CHECK-NEXT: $x2, renamable $x8 = LDPXi renamable $x0, 3 :: (load (s64)) # CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 2 :: (load (s64)) # CHECK-NEXT: renamable $x8 = ADDXrr $x8, $x8 -# CHECK-NEXT: STPXi renamable $x8, killed $x11, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: STPXi renamable $x8, killed $x2, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: STPXi killed renamable $x10, renamable $x9, renamable $x0, 20 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr @@ -444,7 +444,7 @@ # paired store. ($x14 in this example) # CHECK-LABEL: name: test13 # CHECK: bb.0: -# CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13 +# CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x10, $x11, $x12, $x13 # CHECK: $x15, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 4 :: (load (s64)) # CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 100 :: (store (s64), align 4) @@ -465,7 +465,7 @@ machineFunctionInfo: {} body: | bb.0: - liveins: $x0, $x1, $x10, $x11, $x12, $x13 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x10, $x11, $x12, $x13 renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) renamable $x14 = LDRXui renamable $x0, 4 :: (load (s64)) STRXui renamable killed $x14, renamable $x0, 100 :: (store (s64), align 4) @@ -514,11 +514,11 @@ # CHECK-LABEL: name: test15_undef_op # CHECK: bb.0: # CHECK-NEXT: liveins: $x0, $x1, $x8 -# CHECK: undef renamable $x10, $x11 = LDPXi renamable $x0, 0 :: (load (s64)) +# CHECK: undef renamable $x2, $x3 = LDPXi renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) # CHECK-NEXT: STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) -# CHECK-NEXT: renamable $x10 = ADDXrr $x10, $x10 -# CHECK-NEXT: STPXi renamable $x10, killed $x11, renamable $x0, 10 :: (store (s64), align 4) +# CHECK-NEXT: renamable $x2 = ADDXrr $x2, $x2 +# CHECK-NEXT: STPXi renamable $x2, killed $x3, renamable $x0, 10 :: (store (s64), align 4) # CHECK-NEXT: RET undef $lr # name: test15_undef_op @@ -535,12 +535,12 @@ body: | bb.0: liveins: $x0, $x1, $x8 - renamable undef $x10, renamable $x9 = LDPXi renamable $x0, 0 :: (load (s64)) + renamable undef $x2, renamable $x9 = LDPXi renamable $x0, 0 :: (load (s64)) STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) renamable $x9 = LDRXui renamable $x0, 1 :: (load (s64)) STRXui renamable $x9, renamable $x0, 100 :: (store (s64), align 4) - renamable $x10 = ADDXrr $x10, $x10 - STRXui renamable $x10, renamable $x0, 10 :: (store (s64), align 4) + renamable $x2 = ADDXrr $x2, $x2 + STRXui renamable $x2, renamable $x0, 10 :: (store (s64), align 4) RET undef $lr ...