diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -702,9 +702,6 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. SmallVector, 8> RU; - // Keep track of subregister ranges. - SmallVector, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { Register Reg = Register::index2VirtReg(i); @@ -724,14 +721,6 @@ continue; RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); } - - if (MRI->subRegLivenessEnabled()) { - SRs.clear(); - for (const LiveInterval::SubRange &SR : LI.subranges()) { - SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); - } - } - // Every instruction that kills Reg corresponds to a segment range end // point. for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE; @@ -776,20 +765,18 @@ // are actually never written by %2. After assignment the // flag at the read instruction is invalid. LaneBitmask DefinedLanesMask; - if (!SRs.empty()) { + if (LI.hasSubRanges()) { // Compute a mask of lanes that are defined. DefinedLanesMask = LaneBitmask::getNone(); - for (auto &SRP : SRs) { - const LiveInterval::SubRange &SR = *SRP.first; - LiveRange::const_iterator &I = SRP.second; - if (I == SR.end()) - continue; - I = SR.advanceTo(I, RI->end); - if (I == SR.end() || I->start >= RI->end) - continue; - // I is overlapping RI - DefinedLanesMask |= SR.LaneMask; - } + for (const LiveInterval::SubRange &SR : LI.subranges()) + for (const LiveRange::Segment &Segment : SR.segments) { + if (Segment.start >= RI->end) + break; + if (Segment.end == RI->end) { + DefinedLanesMask |= SR.LaneMask; + break; + } + } } else DefinedLanesMask = LaneBitmask::getAll(); @@ -799,7 +786,9 @@ continue; if (MO.isUse()) { // Reading any undefined lanes? - LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + unsigned SubReg = MO.getSubReg(); + LaneBitmask UseMask = SubReg ? TRI->getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); if ((UseMask & ~DefinedLanesMask).any()) goto CancelKill; } else if (MO.getSubReg() == 0) { diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -7303,8 +7303,7 @@ ; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill ; GCN-NOHSA-SI-NEXT: buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill -; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) -; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v21, s0, v3 +; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, s0, v3 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v5 ; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, s0, v5 ; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v6 @@ -7345,7 +7344,6 @@ ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v41, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v33, v1 -; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, v21 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v23 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 ; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir --- a/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir @@ -22,7 +22,7 @@ # GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF # GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr5, $vcc, implicit $exec # GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr4, killed $vcc, implicit $exec -# GCN: $sgpr0_sgpr1 = V_CMP_LT_U64_e64 $vgpr4_vgpr5, $vgpr0_vgpr1, implicit $exec +# GCN: $sgpr0_sgpr1 = V_CMP_LT_U64_e64 killed $vgpr4_vgpr5, killed $vgpr0_vgpr1, implicit $exec --- name: vgpr64_mixed_use tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir --- a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir @@ -367,7 +367,7 @@ # GCN-LABEL: vgpr_lo16_sub{{$}} # GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec -# GCN: renamable $vgpr1_lo16 = COPY renamable $vgpr0_lo16 +# GCN: renamable $vgpr1_lo16 = COPY killed renamable $vgpr0_lo16 --- name: vgpr_lo16_sub tracksRegLiveness: true @@ -404,7 +404,7 @@ # GCN-LABEL: vgpr_hi16_sub{{$}} # GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec -# GCN: renamable $vgpr1_hi16 = COPY renamable $vgpr0_hi16 +# GCN: renamable $vgpr1_hi16 = COPY killed renamable $vgpr0_hi16 --- name: vgpr_hi16_sub tracksRegLiveness: true @@ -441,7 +441,7 @@ # GCN-LABEL: sgpr_lo16_sub{{$}} # GCN: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr14, $sgpr0, implicit-def $scc -# GCN: renamable $sgpr1_lo16 = COPY renamable $sgpr0_lo16 +# GCN: renamable $sgpr1_lo16 = COPY killed renamable $sgpr0_lo16 --- name: sgpr_lo16_sub tracksRegLiveness: true @@ -498,7 +498,7 @@ # Test that bank of subreg is considered during scavenging. # If handled incorrectly an infinite loop occurs. # GCN-LABEL: s0_vs_s15_16_17_sub1{{$}} -# GCN: S_AND_B32 renamable $sgpr13, $sgpr0, +# GCN: S_AND_B32 killed renamable $sgpr13, $sgpr0, --- name: s0_vs_s15_16_17_sub1 tracksRegLiveness: true @@ -531,7 +531,7 @@ # GCN: $vgpr40_vgpr41_vgpr42_vgpr43 = IMPLICIT_DEF # GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF # GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, $vgpr11_vgpr12, 0, killed $vgpr16_vgpr17, 0, 0, implicit $mode, implicit $exec -# GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, $vgpr9_vgpr10, 0, killed $vgpr14_vgpr15, 0, 0, implicit $mode, implicit $exec +# GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, killed $vgpr9_vgpr10, 0, killed $vgpr14_vgpr15, 0, 0, implicit $mode, implicit $exec --- name: vgpr_sub_dependence tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -16,7 +16,7 @@ ; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}} ; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill -; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x{{[0-9a-f]+}}{{$}} +; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x1{{[0-9a-f]+}}{{$}} ; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload ; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir @@ -19,7 +19,7 @@ ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: renamable $sgpr0 = IMPLICIT_DEF ; CHECK: renamable $sgpr1 = IMPLICIT_DEF - ; CHECK: $sgpr104 = S_AND_B32 renamable $sgpr0, renamable $sgpr1, implicit-def $scc + ; CHECK: $sgpr104 = S_AND_B32 killed renamable $sgpr0, renamable $sgpr1, implicit-def $scc ; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 ; CHECK: renamable $sgpr0_sgpr1 = IMPLICIT_DEF ; CHECK: renamable $sgpr0 = IMPLICIT_DEF @@ -27,7 +27,7 @@ ; CHECK: SI_SPILL_S64_SAVE renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store 8 into %stack.0, align 4, addrspace 5) ; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 ; CHECK: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load 8 from %stack.0, align 4, addrspace 5) - ; CHECK: $sgpr105 = S_AND_B32 renamable $sgpr1, renamable $sgpr1, implicit-def $scc + ; CHECK: $sgpr105 = S_AND_B32 killed renamable $sgpr1, renamable $sgpr1, implicit-def $scc ; CHECK: S_NOP 0, implicit $sgpr104, implicit $sgpr105 %0:sreg_64 = COPY $sgpr0_sgpr1 %0.sub0:sreg_64 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/splitkit.mir b/llvm/test/CodeGen/AMDGPU/splitkit.mir --- a/llvm/test/CodeGen/AMDGPU/splitkit.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit.mir @@ -39,11 +39,11 @@ # CHECK: S_NOP 0 # CHECK: S_NOP 0, implicit renamable [[REG0]] # CHECK: S_NOP 0, implicit renamable [[REG1]] -# CHECK: $sgpr0 = COPY renamable [[REG0]] +# CHECK: $sgpr0 = COPY killed renamable [[REG0]] # CHECK: $sgpr2 = COPY renamable [[REG1]] # CHECK: S_NOP # CHECK: S_NOP 0, implicit renamable $sgpr0 -# CHECK: S_NOP 0, implicit renamable $sgpr2 +# CHECK: S_NOP 0, implicit killed renamable $sgpr2 name: func1 tracksRegLiveness: true body: | diff --git a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir --- a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir @@ -16,8 +16,8 @@ # CHECK: S_NOP 0, implicit-def renamable $sgpr3 # CHECK: S_NOP 0, implicit-def renamable $sgpr1 # CHECK: S_NOP 0, implicit-def renamable $sgpr2 -# CHECK: S_NOP 0, implicit renamable $sgpr0, implicit renamable $sgpr3 -# CHECK: S_NOP 0, implicit renamable $sgpr1, implicit renamable $sgpr2 +# CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit renamable $sgpr3 +# CHECK: S_NOP 0, implicit killed renamable $sgpr1, implicit renamable $sgpr2 name: func0 body: | bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir --- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir @@ -40,24 +40,24 @@ ; CHECK: renamable $sgpr13 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr14 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr15 = COPY renamable $sgpr5 - ; CHECK: renamable $vgpr5_vgpr6 = COPY renamable $sgpr0_sgpr1 + ; CHECK: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1 ; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0, 0 :: (dereferenceable load 32, addrspace 6) ; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6) ; CHECK: renamable $sgpr0 = S_MOV_B32 1200 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0, 0 :: (dereferenceable load 32, addrspace 6) ; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6) - ; CHECK: KILL renamable $sgpr0, renamable $sgpr1 + ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1 ; CHECK: renamable $sgpr0 = S_MOV_B32 1264 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0, 0 :: (dereferenceable load 32, addrspace 6) ; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6) - ; CHECK: KILL renamable $sgpr0, renamable $sgpr1 + ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1 ; CHECK: renamable $sgpr0 = S_MOV_B32 1328 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0, 0 :: (dereferenceable load 32, addrspace 6) ; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6) - ; CHECK: KILL renamable $sgpr0, renamable $sgpr1 + ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1 ; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0, 0 :: (dereferenceable load 32, addrspace 6) ; CHECK: renamable $sgpr0 = S_MOV_B32 1392 ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5 @@ -66,10 +66,10 @@ ; CHECK: renamable $sgpr3 = COPY renamable $sgpr5 ; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0, 0 :: (dereferenceable load 32, addrspace 6) ; CHECK: renamable $sgpr4 = S_MOV_B32 1520 - ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6) - ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6) - ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6) - ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") + ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6) + ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6) + ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6) + ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") ; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") ; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") ; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource") @@ -100,7 +100,7 @@ ; CHECK: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr3, 0, 0, implicit $mode, implicit $exec ; CHECK: renamable $sgpr0 = nofpexcept V_CMP_GT_F32_e64 0, 1028443341, 0, killed $vgpr0, 0, implicit $mode, implicit $exec ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 0, 0, 1065353216, killed $sgpr0, implicit $exec - ; CHECK: EXP_DONE 12, killed renamable $vgpr0, renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, -1, 0, 15, implicit $exec + ; CHECK: EXP_DONE 12, killed renamable $vgpr0, killed renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, -1, 0, 15, implicit $exec ; CHECK: S_ENDPGM 0 %0:vgpr_32 = COPY $vgpr0 undef %1.sub0:sgpr_64 = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir --- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir @@ -40,7 +40,7 @@ ; CHECK: $sgpr4 = COPY $sgpr95 ; CHECK: $vgpr0 = COPY renamable $vgpr40 ; CHECK: $vgpr1 = COPY renamable $vgpr41 - ; CHECK: $vgpr2 = COPY renamable $vgpr42 + ; CHECK: $vgpr2 = COPY killed renamable $vgpr42 ; CHECK: $vgpr3 = KILL undef renamable $vgpr3 ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0 ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 diff --git a/llvm/test/CodeGen/PowerPC/subreg-killed.mir b/llvm/test/CodeGen/PowerPC/subreg-killed.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/subreg-killed.mir @@ -0,0 +1,40 @@ +# RUN: llc -mcpu=pwr10 -O3 -ppc-track-subreg-liveness -verify-machineinstrs \ +# RUN: -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \ +# RUN: -o - %s | FileCheck %s + +# This test case checks that the 'killed' flag is properly added when using +# subregisters. + +# CHECK-LABEL: test +# CHECK: KILL_PAIR killed +# CHECK-NEXT: COPY killed +# CHECK-NEXT: KILL_PAIR killed +# CHECK-NEXT: COPY killed +# CHECK-NEXT: BUILD_UACC killed +# CHECK-NEXT: XXMTACC killed +# CHECK-NEXT: SPILL_ACC killed + +--- +name: test +tracksRegLiveness: true +fixedStack: + - { id: 0, size: 8 } +stack: + - { id: 0, size: 64 } +body: | + bb.0: + liveins: $v2, $v3, $v4, $v5 + undef %4.sub_vsx1:vsrprc_with_sub_64_in_vfrc = COPY $v5 + %4.sub_vsx0:vsrprc_with_sub_64_in_vfrc = COPY $v4 + undef %5.sub_vsx1:vsrprc_with_sub_64_in_vfrc = COPY $v3 + %5.sub_vsx0:vsrprc_with_sub_64_in_vfrc = COPY $v2 + %6:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.0 + %5:vsrprc_with_sub_64_in_vfrc = KILL_PAIR %5 + undef %7.sub_pair0:uaccrc = COPY %5 + %4:vsrprc_with_sub_64_in_vfrc = KILL_PAIR %4 + %7.sub_pair1:uaccrc = COPY %4 + %8:accrc = BUILD_UACC %7 + %8:accrc = XXMTACC %8 + SPILL_ACC %8, 0, %stack.0 + +...