diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp --- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp @@ -97,14 +97,25 @@ const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC, SubRegMap &SubRegs) const; - /// Try to find register class containing registers of minimal size for a - /// given register class RC and used subregs as keys in SubRegs by shifting - /// offsets of the subregs by RShift value to the right. If found return the - /// resulting regclass and new shifted subregs as values in SubRegs map. - /// If CoverSubregIdx isn't null it specifies covering subreg. + /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to + /// find new regclass such that: + /// 1. It has subregs obtained by shifting each OldSubReg by RShift number + /// of bits to the right. Every "shifted" subreg should have the same + /// SubRegRC. SubRegRC can be null, in this case it initialized using + /// getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that + /// "covers" all other subregs in pairs. Basically such subreg becomes a + /// whole register. + /// 2. Resulting register class contains registers of minimal size but not + /// less than RegNumBits. + /// + /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out + /// parameter: + /// OldSubReg - input parameter, + /// SubRegRC - in/out, should be changed for unknown regclass, + /// NewSubReg - output, contains shifted subregs on return. const TargetRegisterClass * getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift, - unsigned CoverSubregIdx, + unsigned RegNumBits, unsigned CoverSubregIdx, SubRegMap &SubRegs) const; /// Update live intervals after rewriting OldReg to NewReg with SubRegs map @@ -207,8 +218,8 @@ const TargetRegisterClass * GCNRewritePartialRegUses::getRegClassWithShiftedSubregs( - const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx, - SubRegMap &SubRegs) const { + const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits, + unsigned CoverSubregIdx, SubRegMap &SubRegs) const { unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC); LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign @@ -218,7 +229,13 @@ for (auto &[OldSubReg, SRI] : SubRegs) { auto &[SubRegRC, NewSubReg] = SRI; - // Instruction operand may not specify required register class (ex. COPY). + // Register class may be unknown, for example: + // undef %0.sub4:sgpr_1024 = S_MOV_B32 01 + // %0.sub5:sgpr_1024 = S_MOV_B32 02 + // %1:vreg_64 = COPY %0.sub4_sub5 + // Register classes for subregs 'sub4' and 'sub5' are known from the + // description of destination operand of S_MOV_B32 instruction but the + // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction. if (!SubRegRC) SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg); @@ -256,21 +273,26 @@ // ClassMask is the set of all register classes such that each class is // allocatable, aligned, has all shifted subregs and each subreg has required // register class (see SubRegRC above). Now select first (that is largest) - // register class with registers of minimal size. + // register class with registers of minimal but not less than RegNumBits size. + // We have to check register size because we may encounter classes of smaller + // registers like VReg_1 in some situations. const TargetRegisterClass *MinRC = nullptr; unsigned MinNumBits = std::numeric_limits::max(); for (unsigned ClassID : ClassMask.set_bits()) { auto *RC = TRI->getRegClass(ClassID); unsigned NumBits = TRI->getRegSizeInBits(*RC); - if (NumBits < MinNumBits) { + if (NumBits < MinNumBits && NumBits >= RegNumBits) { MinNumBits = NumBits; MinRC = RC; } + if (MinNumBits == RegNumBits) + break; } #ifndef NDEBUG if (MinRC) { assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign)); for (auto [SubReg, SRI] : SubRegs) + // Check that all registers in MinRC support SRI.SubReg subregister. assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg)); } #endif @@ -302,7 +324,8 @@ // If covering subreg is found shift everything so the covering subreg would // be in the rightmost position. if (CoverSubreg != AMDGPU::NoSubRegister) - return getRegClassWithShiftedSubregs(RC, Offset, CoverSubreg, SubRegs); + return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg, + SubRegs); // Otherwise find subreg with maximum required alignment and shift it and all // other subregs to the rightmost possible position with respect to the @@ -328,7 +351,7 @@ llvm_unreachable("misaligned subreg"); unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg; - return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs); + return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs); } // Only the subrange's lanemasks of the original interval need to be modified. @@ -406,6 +429,10 @@ return false; } + auto *RC = MRI->getRegClass(Reg); + LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI) + << ':' << TRI->getRegClassName(RC) << '\n'); + // Collect used subregs and constrained reg classes infered from instruction // operands. SubRegMap SubRegs; @@ -413,14 +440,15 @@ assert(MO.getSubReg() != AMDGPU::NoSubRegister); auto *OpDescRC = getOperandRegClass(MO); const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC); - if (!Inserted) { + if (!Inserted && OpDescRC) { SubRegInfo &SRI = I->second; - SRI.RC = TRI->getCommonSubClass(SRI.RC, OpDescRC); + SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC; + if (!SRI.RC) { + LLVM_DEBUG(dbgs() << " Couldn't find common target regclass\n"); + return false; + } } } - auto *RC = MRI->getRegClass(Reg); - LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI) - << ':' << TRI->getRegClassName(RC) << '\n'); auto *NewRC = getMinSizeReg(RC, SubRegs); if (!NewRC) { diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir --- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir @@ -4341,9 +4341,9 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_64_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] undef %0.sub0:sgpr_64 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4358,11 +4358,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_96_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 22 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 22 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_96 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4381,11 +4381,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_128_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 23 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_128 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4425,11 +4425,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_160_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 24 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 24 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_160 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4450,11 +4450,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_192_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 25 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 25 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_192 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4503,11 +4503,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_224_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 26 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 26 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_224 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4530,11 +4530,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_256_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 27 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 27 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_256 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4612,11 +4612,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_288_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 28 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_288 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4672,11 +4672,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_320_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 29 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 29 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_320 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4763,11 +4763,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_352_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 210 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 210 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_352 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4791,11 +4791,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_384_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 211 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 211 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_384 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -4929,11 +4929,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_512_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 215 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 215 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_512 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 @@ -5086,11 +5086,11 @@ body: | bb.0: ; CHECK-LABEL: name: test_sgpr_1024_w32 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]] - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]] - ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 231 + ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 231 ; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]] undef %0.sub0:sgpr_1024 = S_MOV_B32 00 S_NOP 0, implicit %0.sub0 diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir --- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir @@ -52,3 +52,39 @@ S_NOP 0, implicit %4.sub1_sub2_sub3_sub4_sub5_sub6 S_NOP 0, implicit %4.sub3_sub4_sub5_sub6_sub7_sub8 ... +--- +name: test_subregs_unknown_regclass_from_instructions +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: test_subregs_unknown_regclass_from_instructions + ; CHECK: undef %2.sub0:sgpr_64 = S_MOV_B32 1 + ; CHECK-NEXT: %2.sub1:sgpr_64 = S_MOV_B32 2 + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY %2 + undef %0.sub4:sgpr_1024 = S_MOV_B32 01 + %0.sub5:sgpr_1024 = S_MOV_B32 02 + %1:vreg_64 = COPY %0.sub4_sub5 +... +--- +name: test_subregs_unknown_regclass_from_instructions_sgpr_1024_to_sgpr_64 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_1024 } +body: | + bb.0: + ; CHECK-LABEL: name: test_subregs_unknown_regclass_from_instructions_sgpr_1024_to_sgpr_64 + ; CHECK: dead [[COPY:%[0-9]+]]:vreg_64 = COPY undef %2:sgpr_64 + %1:vreg_64 = COPY undef %0.sub4_sub5 +... +--- +name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0 + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]] + undef %0.sub2_sub3:sgpr_128 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0 + %2:vreg_64 = COPY %0.sub2_sub3:sgpr_128 +... +