diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp @@ -230,15 +230,24 @@ if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) continue; - if (MO.isKill() && MO.readsReg()) { + if (MO.readsReg()) { LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); // Make sure two conditions are met: // a.) the value is defined before/in the IF block // b.) should be defined in the same loop-level. if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && - Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) - KillsInElse.insert(MOReg); + Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) { + // Check if the register is live into the endif block. If not, + // consider it killed in the else region. + LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); + if (!VI.isLiveIn(*Endif, MOReg, *MRI)) { + KillsInElse.insert(MOReg); + } else { + LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) + << " as Live in Endif\n"); + } + } } } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -158,8 +158,8 @@ ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 ; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v1, v2, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr2 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -844,8 +844,8 @@ ; CGP-NEXT: v_xor_b32_e32 v1, v4, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: ; implicit-def: $vgpr4 +; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: BB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -1023,8 +1023,8 @@ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v5 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v5, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr6 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB2_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -2661,8 +2661,8 @@ ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v3 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB7_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -3149,8 +3149,8 @@ ; CGP-NEXT: v_xor_b32_e32 v1, v4, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc -; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11 +; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: BB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -3328,8 +3328,8 @@ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v5 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v5 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v5, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr8_vgpr9 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB8_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -156,8 +156,8 @@ ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v6 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6 ; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v1, v6, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr2 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -830,8 +830,8 @@ ; CGP-NEXT: v_xor_b32_e32 v4, v0, v10 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v10 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v4, v10, vcc -; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: ; implicit-def: $vgpr4 +; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: BB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -1005,8 +1005,8 @@ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v8 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v8, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr6 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB2_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -2623,8 +2623,8 @@ ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v6 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v6 ; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v1, v6, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB7_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -3103,8 +3103,8 @@ ; CGP-NEXT: v_xor_b32_e32 v4, v0, v10 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v10 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v4, v10, vcc -; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11 +; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: BB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -3278,8 +3278,8 @@ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v8 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v8 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v8, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr8_vgpr9 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB8_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -143,8 +143,8 @@ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr2 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -765,8 +765,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: ; implicit-def: $vgpr4 +; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: BB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -929,8 +929,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr6 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB2_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -2428,8 +2428,8 @@ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB7_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -2871,8 +2871,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11 +; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: BB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] @@ -3035,8 +3035,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr8_vgpr9 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB8_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -142,8 +142,8 @@ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr2 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -756,8 +756,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v5, vcc -; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: ; implicit-def: $vgpr4 +; CGP-NEXT: ; implicit-def: $vgpr8 ; CGP-NEXT: BB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -917,8 +917,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr6 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB2_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -1787,8 +1787,8 @@ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v0, v4, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc -; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: BB7_2: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -2225,8 +2225,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v6, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc -; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: ; implicit-def: $vgpr10_vgpr11 +; CGP-NEXT: ; implicit-def: $vgpr5 ; CGP-NEXT: BB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -2386,8 +2386,8 @@ ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc -; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: ; implicit-def: $vgpr8_vgpr9 +; CGP-NEXT: ; implicit-def: $vgpr2 ; CGP-NEXT: BB8_6: ; %Flow ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/bypass-div.ll b/llvm/test/CodeGen/AMDGPU/bypass-div.ll --- a/llvm/test/CodeGen/AMDGPU/bypass-div.ll +++ b/llvm/test/CodeGen/AMDGPU/bypass-div.ll @@ -136,8 +136,8 @@ ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v1, v2 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v0, v2, vcc -; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: BB0_2: ; %Flow ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -289,8 +289,8 @@ ; GFX9-NEXT: v_cndmask_b32_e64 v0, v10, v8, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: BB1_2: ; %Flow ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -455,8 +455,8 @@ ; GFX9-NEXT: v_xor_b32_e32 v1, v1, v6 ; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v6 ; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v6, vcc -; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: BB2_2: ; %Flow ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -605,8 +605,8 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v5, v1, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, v9, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc -; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: BB3_2: ; %Flow ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -908,8 +908,8 @@ ; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7 ; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7 ; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc -; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: BB8_2: ; %Flow ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[10:11] ; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] @@ -1077,8 +1077,8 @@ ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc -; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: BB9_2: ; %Flow ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -205,27 +205,28 @@ ; SI: bb.1.Flow: ; SI: successors: %bb.2(0x40000000), %bb.8(0x40000000) ; SI: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %29:vgpr_32, %bb.0, %4, %bb.7 - ; SI: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %45:vgpr_32, %bb.7 - ; SI: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %47:vgpr_32, %bb.7 + ; SI: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %45:vgpr_32, %bb.7 + ; SI: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %47:vgpr_32, %bb.7 + ; SI: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %49:vgpr_32, %bb.7 ; SI: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI: S_BRANCH %bb.2 ; SI: bb.2.if: ; SI: successors: %bb.3(0x80000000) - ; SI: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI1]], %subreg.sub0, killed [[PHI2]], %subreg.sub1 + ; SI: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI2]], %subreg.sub0, killed [[PHI3]], %subreg.sub1 ; SI: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI: bb.3: ; SI: successors: %bb.3(0x40000000), %bb.4(0x40000000) - ; SI: [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %49:vreg_64, %bb.3, [[REG_SEQUENCE]], %bb.2 - ; SI: [[PHI4:%[0-9]+]]:vgpr_32 = PHI undef %51:vgpr_32, %bb.3, [[COPY4]], %bb.2 - ; SI: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub0, implicit $exec - ; SI: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub1, implicit $exec + ; SI: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %51:vreg_64, %bb.3, [[REG_SEQUENCE]], %bb.2 + ; SI: [[PHI5:%[0-9]+]]:vgpr_32 = PHI undef %53:vgpr_32, %bb.3, [[PHI1]], %bb.2 + ; SI: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec + ; SI: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec ; SI: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; SI: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], killed [[PHI3]], implicit $exec + ; SI: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], killed [[PHI4]], implicit $exec ; SI: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] - ; SI: $vgpr0 = COPY killed [[PHI4]] + ; SI: $vgpr0 = COPY killed [[PHI5]] ; SI: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 @@ -242,16 +243,17 @@ ; SI: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; SI: bb.6: ; SI: successors: %bb.6(0x40000000), %bb.7(0x40000000) - ; SI: [[PHI5:%[0-9]+]]:vreg_64 = PHI undef %53:vreg_64, %bb.6, [[REG_SEQUENCE2]], %bb.5 - ; SI: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI5]].sub0, implicit $exec - ; SI: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI5]].sub1, implicit $exec + ; SI: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %55:vreg_64, %bb.6, [[REG_SEQUENCE2]], %bb.5 + ; SI: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %57:vgpr_32, %bb.6, [[COPY4]], %bb.5 + ; SI: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub0, implicit $exec + ; SI: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub1, implicit $exec ; SI: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; SI: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], killed [[PHI5]], implicit $exec + ; SI: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], killed [[PHI6]], implicit $exec ; SI: [[S_AND_SAVEEXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_1]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] - ; SI: $vgpr0 = COPY [[COPY4]] + ; SI: $vgpr0 = COPY killed [[PHI7]] ; SI: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 @@ -263,9 +265,9 @@ ; SI: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY10]] ; SI: S_BRANCH %bb.1 ; SI: bb.8.end: - ; SI: [[PHI6:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.4 + ; SI: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.4 ; SI: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI: $vgpr0 = COPY killed [[PHI6]] + ; SI: $vgpr0 = COPY killed [[PHI8]] ; SI: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll @@ -157,15 +157,16 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 { ; SI-LABEL: loop: ; SI: ; %bb.0: ; %main_body +; SI-NEXT: v_mov_b32_e32 v6, v0 ; SI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; SI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; SI-NEXT: s_mov_b32 s38, -1 -; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 -; SI-NEXT: v_mov_b32_e32 v40, v1 +; SI-NEXT: v_mov_b32_e32 v0, v1 +; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6 ; SI-NEXT: s_mov_b32 s39, 0x31c16000 ; SI-NEXT: s_add_u32 s36, s36, s1 ; SI-NEXT: s_addc_u32 s37, s37, 0 -; SI-NEXT: ; implicit-def: $vgpr0 +; SI-NEXT: ; implicit-def: $vgpr1 ; SI-NEXT: s_mov_b32 s32, 0 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo ; SI-NEXT: s_xor_b32 s33, exec_lo, s0 @@ -177,15 +178,17 @@ ; SI-NEXT: v_readfirstlane_b32 s5, v5 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] ; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo -; SI-NEXT: v_mov_b32_e32 v0, v40 ; SI-NEXT: s_mov_b64 s[0:1], s[36:37] ; SI-NEXT: s_mov_b64 s[2:3], s[38:39] ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: v_mov_b32_e32 v1, v0 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5 +; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35 ; SI-NEXT: s_cbranch_execnz BB3_2 ; SI-NEXT: ; %bb.3: ; SI-NEXT: s_mov_b32 exec_lo, s34 +; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: ; implicit-def: $vgpr2 ; SI-NEXT: BB3_4: ; %Flow ; SI-NEXT: s_or_saveexec_b32 s33, s33 @@ -198,18 +201,19 @@ ; SI-NEXT: v_readfirstlane_b32 s5, v3 ; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] ; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo -; SI-NEXT: v_mov_b32_e32 v0, v40 ; SI-NEXT: s_mov_b64 s[0:1], s[36:37] ; SI-NEXT: s_mov_b64 s[2:3], s[38:39] ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: v_mov_b32_e32 v1, v0 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3 -; SI-NEXT: ; implicit-def: $vgpr40 +; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35 ; SI-NEXT: s_cbranch_execnz BB3_6 ; SI-NEXT: ; %bb.7: ; SI-NEXT: s_mov_b32 exec_lo, s34 ; SI-NEXT: BB3_8: ; %end ; SI-NEXT: s_or_b32 exec_lo, exec_lo, s33 +; SI-NEXT: v_mov_b32_e32 v0, v1 ; SI-NEXT: ; return to shader part epilog main_body: %cc = icmp sgt i32 %z, 5