diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -723,6 +723,16 @@ } } +static bool isRewritableImplicitDef(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::ORRWrs: + case AArch64::ADDWri: + return true; + } +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MergeMI, @@ -871,12 +881,13 @@ // Return the sub/super register for RenameReg, matching the size of // OriginalReg. - auto GetMatchingSubReg = [this, - RenameReg](MCPhysReg OriginalReg) -> MCPhysReg { - for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg)) - if (TRI->getMinimalPhysRegClass(OriginalReg) == - TRI->getMinimalPhysRegClass(SubOrSuper)) + auto GetMatchingSubReg = + [this, RenameReg](const TargetRegisterClass *C) -> MCPhysReg { + for (MCPhysReg SubOrSuper : + TRI->sub_and_superregs_inclusive(*RenameReg)) { + if (C->contains(SubOrSuper)) return SubOrSuper; + } llvm_unreachable("Should have found matching sub or super register!"); }; @@ -884,7 +895,8 @@ [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) { if (IsDef) { bool SeenDef = false; - for (auto &MOP : MI.operands()) { + for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &MOP = MI.getOperand(OpIdx); // Rename the first explicit definition and all implicit // definitions matching RegToRename. if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && @@ -893,18 +905,33 @@ assert((MOP.isImplicit() || (MOP.isRenamable() && !MOP.isEarlyClobber())) && "Need renamable operands"); - MOP.setReg(GetMatchingSubReg(MOP.getReg())); + Register MatchingReg; + if (const TargetRegisterClass *RC = + MI.getRegClassConstraint(OpIdx, TII, TRI)) + MatchingReg = GetMatchingSubReg(RC); + else { + if (!isRewritableImplicitDef(MI.getOpcode())) + continue; + MatchingReg = GetMatchingSubReg( + TRI->getMinimalPhysRegClass(MOP.getReg())); + } + MOP.setReg(MatchingReg); SeenDef = true; } } } else { - for (auto &MOP : MI.operands()) { + for (unsigned OpIdx = 0; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &MOP = MI.getOperand(OpIdx); if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && TRI->regsOverlap(MOP.getReg(), RegToRename)) { assert((MOP.isImplicit() || (MOP.isRenamable() && !MOP.isEarlyClobber())) && "Need renamable operands"); - MOP.setReg(GetMatchingSubReg(MOP.getReg())); + const TargetRegisterClass *RC = + MI.getRegClassConstraint(OpIdx, TII, TRI); + if (!RC) + continue; + MOP.setReg(GetMatchingSubReg(RC)); } } } @@ -1404,6 +1431,16 @@ << MOP << ")\n"); return false; } + + // We cannot rename arbitrary implicit-defs, the specific rule to rewrite + // them must be known. For example, in ORRWrs the implicit-def + // corresponds to the result register. + if (MOP.isImplicit() && MOP.isDef()) { + if (!isRewritableImplicitDef(MOP.getParent()->getOpcode())) + return false; + return TRI->isSuperOrSubRegisterEq( + MOP.getParent()->getOperand(0).getReg(), MOP.getReg()); + } } return MOP.isImplicit() || (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); @@ -1511,10 +1548,9 @@ // required register classes. auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) { return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) { - return any_of(TRI->sub_and_superregs_inclusive(PR), - [C, TRI](MCPhysReg SubOrSuper) { - return C == TRI->getMinimalPhysRegClass(SubOrSuper); - }); + return any_of( + TRI->sub_and_superregs_inclusive(PR), + [C](MCPhysReg SubOrSuper) { return C->contains(SubOrSuper); }); }); }; diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir @@ -47,7 +47,7 @@ ... # CHECK-LABEL: name: test2 # CHECK: bb.0: -# CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13 +# CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13 # CHECK: renamable $w19 = LDRWui renamable $x0, 0 :: (load (s64)) # PRESERVED-NEXT: renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) # NOPRES-NEXT: renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) @@ -74,7 +74,7 @@ machineFunctionInfo: {} body: | bb.0: - liveins: $x0, $x1, $x10, $x11, $x12, $x13 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13 renamable $w19 = LDRWui renamable $x0, 0 :: (load (s64)) renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-undef-assert.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-undef-assert.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-undef-assert.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-undef-assert.mir @@ -17,13 +17,13 @@ # CHECK-NEXT: liveins: $x0, $x17, $x18 # CHECK: renamable $q13_q14_q15 = LD3Threev16b undef renamable $x17 :: (load (s384) from `ptr undef`, align 64) # CHECK-NEXT: renamable $q23_q24_q25 = LD3Threev16b undef renamable $x18 :: (load (s384) from `ptr undef`, align 64) -# CHECK-NEXT: $q16 = EXTv16i8 renamable $q23, renamable $q23, 8 +# CHECK-NEXT: $q0 = EXTv16i8 renamable $q23, renamable $q23, 8 # CHECK-NEXT: renamable $q20 = EXTv16i8 renamable $q14, renamable $q14, 8 # CHECK-NEXT: STRQui killed renamable $q20, $sp, 4 :: (store (s128)) # CHECK-NEXT: renamable $d6 = ZIP2v8i8 renamable $d23, undef renamable $d16 # CHECK-NEXT: STRDui killed renamable $d6, $sp, 11 :: (store (s64)) # CHECK-NEXT: renamable $q6 = EXTv16i8 renamable $q13, renamable $q13, 8 -# CHECK-NEXT: STPQi killed renamable $q6, killed $q16, $sp, 6 :: (store (s128)) +# CHECK-NEXT: STPQi killed renamable $q6, killed $q0, $sp, 6 :: (store (s128)) # CHECK-NEXT: RET undef $lr --- diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -361,7 +361,7 @@ # ($x14 in this example) # CHECK-LABEL: name: test11 # CHECK: bb.0: -# CHECK-NEXT: liveins: $x0, $x1, $x11, $x12, $x13 +# CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x11, $x12, $x13 # CHECK: renamable $w10 = LDRWui renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) @@ -387,7 +387,7 @@ machineFunctionInfo: {} body: | bb.0: - liveins: $x0, $x1, $x11, $x12, $x13 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x11, $x12, $x13 renamable $w10 = LDRWui renamable $x0, 0 :: (load (s64)) renamable $x9, renamable $x8 = LDPXi renamable $x0, 1 :: (load (s64)) STRXui renamable killed $x9, renamable $x0, 11 :: (store (s64), align 4) @@ -445,7 +445,7 @@ # paired store. ($x14 in this example) # CHECK-LABEL: name: test13 # CHECK: bb.0: -# CHECK-NEXT: liveins: $x0, $x1, $x10, $x11, $x12, $x13 +# CHECK-NEXT: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x10, $x11, $x12, $x13 # CHECK: renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) # CHECK-NEXT: renamable $x14 = LDRXui renamable $x0, 4 :: (load (s64)) # CHECK-NEXT: STRXui killed renamable $x14, renamable $x0, 100 :: (store (s64), align 4) @@ -467,7 +467,7 @@ machineFunctionInfo: {} body: | bb.0: - liveins: $x0, $x1, $x10, $x11, $x12, $x13 + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x10, $x11, $x12, $x13 renamable $x9, renamable $x8 = LDPXi renamable $x0, 0 :: (load (s64)) renamable $x14 = LDRXui renamable $x0, 4 :: (load (s64)) STRXui renamable killed $x14, renamable $x0, 100 :: (store (s64), align 4)