diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -293,6 +293,10 @@ } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -57,7 +57,10 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.setPreservesAll(); + // FIXME: AU.setPreservesAll(); + AU.addPreservedID(MachineDominatorsID); + AU.addPreserved(); + AU.addPreservedID(LiveVariablesID); MachineFunctionPass::getAnalysisUsage(AU); } }; diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp --- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -152,7 +152,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addPreserved(); + // FIXME: AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp --- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -217,7 +217,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); - AU.addPreserved(); + // FIXME: AU.addPreserved(); AU.addRequired(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -119,41 +119,43 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v15, v0 -; GFX1030-NEXT: v_mov_b32_e32 v16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v17, v2 -; GFX1030-NEXT: v_mov_b32_e32 v18, v3 -; GFX1030-NEXT: v_mov_b32_e32 v19, v4 -; GFX1030-NEXT: v_mov_b32_e32 v20, v5 -; GFX1030-NEXT: v_mov_b32_e32 v21, v6 -; GFX1030-NEXT: v_mov_b32_e32 v22, v7 -; GFX1030-NEXT: v_mov_b32_e32 v23, v8 -; GFX1030-NEXT: v_mov_b32_e32 v24, v9 -; GFX1030-NEXT: v_mov_b32_e32 v25, v10 +; GFX1030-NEXT: v_mov_b32_e32 v23, v0 +; GFX1030-NEXT: v_mov_b32_e32 v24, v1 +; GFX1030-NEXT: v_mov_b32_e32 v25, v2 +; GFX1030-NEXT: v_mov_b32_e32 v26, v3 +; GFX1030-NEXT: v_mov_b32_e32 v27, v4 +; GFX1030-NEXT: v_mov_b32_e32 v28, v5 +; GFX1030-NEXT: v_mov_b32_e32 v29, v6 +; GFX1030-NEXT: v_mov_b32_e32 v30, v7 +; GFX1030-NEXT: v_mov_b32_e32 v31, v8 +; GFX1030-NEXT: v_mov_b32_e32 v32, v9 +; GFX1030-NEXT: v_mov_b32_e32 v33, v10 +; GFX1030-NEXT: v_mov_b32_e32 v21, v13 +; GFX1030-NEXT: v_mov_b32_e32 v22, v14 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1030-NEXT: v_readfirstlane_b32 s4, v11 ; GFX1030-NEXT: v_readfirstlane_b32 s5, v12 -; GFX1030-NEXT: v_readfirstlane_b32 s6, v13 -; GFX1030-NEXT: v_readfirstlane_b32 s7, v14 +; GFX1030-NEXT: v_readfirstlane_b32 s6, v21 +; GFX1030-NEXT: v_readfirstlane_b32 s7, v22 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] -; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] +; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[21:22] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[15:30], s[4:7] +; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[23:38], s[4:7] ; GFX1030-NEXT: ; implicit-def: $vgpr11 -; GFX1030-NEXT: ; implicit-def: $vgpr15 -; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr18 -; GFX1030-NEXT: ; implicit-def: $vgpr19 -; GFX1030-NEXT: ; implicit-def: $vgpr20 -; GFX1030-NEXT: ; implicit-def: $vgpr21 -; GFX1030-NEXT: ; implicit-def: $vgpr22 ; GFX1030-NEXT: ; implicit-def: $vgpr23 ; GFX1030-NEXT: ; implicit-def: $vgpr24 ; GFX1030-NEXT: ; implicit-def: $vgpr25 -; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 +; GFX1030-NEXT: ; implicit-def: $vgpr26 +; GFX1030-NEXT: ; implicit-def: $vgpr27 +; GFX1030-NEXT: ; implicit-def: $vgpr28 +; GFX1030-NEXT: ; implicit-def: $vgpr29 +; GFX1030-NEXT: ; implicit-def: $vgpr30 +; GFX1030-NEXT: ; implicit-def: $vgpr31 +; GFX1030-NEXT: ; implicit-def: $vgpr32 +; GFX1030-NEXT: ; implicit-def: $vgpr33 +; GFX1030-NEXT: ; implicit-def: $vgpr19_vgpr20_vgpr21_vgpr22 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB6_1 ; GFX1030-NEXT: ; %bb.2: @@ -165,32 +167,32 @@ ; GFX1013: ; %bb.0: ; GFX1013-NEXT: v_mov_b32_e32 v16, v11 ; GFX1013-NEXT: v_mov_b32_e32 v17, v12 -; GFX1013-NEXT: v_mov_b32_e32 v18, v13 -; GFX1013-NEXT: v_mov_b32_e32 v19, v14 +; GFX1013-NEXT: v_mov_b32_e32 v19, v13 +; GFX1013-NEXT: v_mov_b32_e32 v20, v14 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GFX1013-NEXT: v_readfirstlane_b32 s4, v16 ; GFX1013-NEXT: v_readfirstlane_b32 s5, v17 -; GFX1013-NEXT: v_readfirstlane_b32 s6, v18 -; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s6, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s7, v20 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] -; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] +; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[19:20] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7] +; GFX1013-NEXT: image_bvh_intersect_ray v[21:24], v[0:15], s[4:7] ; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 +; GFX1013-NEXT: ; implicit-def: $vgpr17_vgpr18_vgpr19_vgpr20 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB6_1 ; GFX1013-NEXT: ; %bb.2: ; GFX1013-NEXT: s_mov_b32 exec_lo, s1 ; GFX1013-NEXT: s_waitcnt vmcnt(0) -; GFX1013-NEXT: v_mov_b32_e32 v0, v20 -; GFX1013-NEXT: v_mov_b32_e32 v1, v21 -; GFX1013-NEXT: v_mov_b32_e32 v2, v22 -; GFX1013-NEXT: v_mov_b32_e32 v3, v23 +; GFX1013-NEXT: v_mov_b32_e32 v0, v21 +; GFX1013-NEXT: v_mov_b32_e32 v1, v22 +; GFX1013-NEXT: v_mov_b32_e32 v2, v23 +; GFX1013-NEXT: v_mov_b32_e32 v3, v24 ; GFX1013-NEXT: ; return to shader part epilog %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> @@ -200,40 +202,42 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v13, v0 -; GFX1030-NEXT: v_mov_b32_e32 v14, v1 +; GFX1030-NEXT: v_mov_b32_e32 v20, v0 +; GFX1030-NEXT: v_mov_b32_e32 v21, v1 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v7 -; GFX1030-NEXT: v_mov_b32_e32 v15, v2 +; GFX1030-NEXT: v_mov_b32_e32 v22, v2 ; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v16, v3 +; GFX1030-NEXT: v_mov_b32_e32 v23, v3 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v17, v4 -; GFX1030-NEXT: v_alignbit_b32 v20, v2, v7, 16 +; GFX1030-NEXT: v_mov_b32_e32 v24, v4 +; GFX1030-NEXT: v_mov_b32_e32 v18, v11 +; GFX1030-NEXT: v_mov_b32_e32 v19, v12 +; GFX1030-NEXT: v_and_or_b32 v25, v5, 0xffff, v0 +; GFX1030-NEXT: v_and_or_b32 v26, v6, 0xffff, v1 +; GFX1030-NEXT: v_alignbit_b32 v27, v2, v7, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo -; GFX1030-NEXT: v_and_or_b32 v18, v5, 0xffff, v0 -; GFX1030-NEXT: v_and_or_b32 v19, v6, 0xffff, v1 ; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 ; GFX1030-NEXT: v_readfirstlane_b32 s4, v9 ; GFX1030-NEXT: v_readfirstlane_b32 s5, v10 -; GFX1030-NEXT: v_readfirstlane_b32 s6, v11 -; GFX1030-NEXT: v_readfirstlane_b32 s7, v12 +; GFX1030-NEXT: v_readfirstlane_b32 s6, v18 +; GFX1030-NEXT: v_readfirstlane_b32 s7, v19 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] +; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[13:20], s[4:7] a16 +; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[20:27], s[4:7] a16 ; GFX1030-NEXT: ; implicit-def: $vgpr9 -; GFX1030-NEXT: ; implicit-def: $vgpr13 -; GFX1030-NEXT: ; implicit-def: $vgpr14 -; GFX1030-NEXT: ; implicit-def: $vgpr15 -; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr18 -; GFX1030-NEXT: ; implicit-def: $vgpr19 ; GFX1030-NEXT: ; implicit-def: $vgpr20 -; GFX1030-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 +; GFX1030-NEXT: ; implicit-def: $vgpr21 +; GFX1030-NEXT: ; implicit-def: $vgpr22 +; GFX1030-NEXT: ; implicit-def: $vgpr23 +; GFX1030-NEXT: ; implicit-def: $vgpr24 +; GFX1030-NEXT: ; implicit-def: $vgpr25 +; GFX1030-NEXT: ; implicit-def: $vgpr26 +; GFX1030-NEXT: ; implicit-def: $vgpr27 +; GFX1030-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB7_1 ; GFX1030-NEXT: ; %bb.2: @@ -243,38 +247,40 @@ ; ; GFX1013-LABEL: image_bvh_intersect_ray_a16_vgpr_descr: ; GFX1013: ; %bb.0: -; GFX1013-NEXT: v_lshrrev_b32_e32 v13, 16, v5 -; GFX1013-NEXT: v_and_b32_e32 v14, 0xffff, v7 +; GFX1013-NEXT: v_mov_b32_e32 v13, v11 +; GFX1013-NEXT: v_mov_b32_e32 v14, v12 +; GFX1013-NEXT: v_lshrrev_b32_e32 v11, 16, v5 +; GFX1013-NEXT: v_and_b32_e32 v12, 0xffff, v7 ; GFX1013-NEXT: v_and_b32_e32 v8, 0xffff, v8 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo -; GFX1013-NEXT: v_lshlrev_b32_e32 v13, 16, v13 -; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11 +; GFX1013-NEXT: v_lshlrev_b32_e32 v12, 16, v12 ; GFX1013-NEXT: v_alignbit_b32 v7, v8, v7, 16 -; GFX1013-NEXT: v_and_or_b32 v5, v5, 0xffff, v13 -; GFX1013-NEXT: v_and_or_b32 v6, v6, 0xffff, v14 +; GFX1013-NEXT: v_and_or_b32 v5, v5, 0xffff, v11 +; GFX1013-NEXT: v_and_or_b32 v6, v6, 0xffff, v12 ; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 ; GFX1013-NEXT: v_readfirstlane_b32 s4, v9 ; GFX1013-NEXT: v_readfirstlane_b32 s5, v10 -; GFX1013-NEXT: v_readfirstlane_b32 s6, v11 -; GFX1013-NEXT: v_readfirstlane_b32 s7, v12 +; GFX1013-NEXT: v_readfirstlane_b32 s6, v13 +; GFX1013-NEXT: v_readfirstlane_b32 s7, v14 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12] +; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1013-NEXT: image_bvh_intersect_ray v[13:16], v[0:7], s[4:7] a16 +; GFX1013-NEXT: image_bvh_intersect_ray v[15:18], v[0:7], s[4:7] a16 ; GFX1013-NEXT: ; implicit-def: $vgpr9 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 -; GFX1013-NEXT: ; implicit-def: $vgpr9_vgpr10_vgpr11_vgpr12 +; GFX1013-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB7_1 ; GFX1013-NEXT: ; %bb.2: ; GFX1013-NEXT: s_mov_b32 exec_lo, s1 ; GFX1013-NEXT: s_waitcnt vmcnt(0) -; GFX1013-NEXT: v_mov_b32_e32 v0, v13 -; GFX1013-NEXT: v_mov_b32_e32 v1, v14 -; GFX1013-NEXT: v_mov_b32_e32 v2, v15 -; GFX1013-NEXT: v_mov_b32_e32 v3, v16 +; GFX1013-NEXT: v_mov_b32_e32 v0, v15 +; GFX1013-NEXT: v_mov_b32_e32 v1, v16 +; GFX1013-NEXT: v_mov_b32_e32 v2, v17 +; GFX1013-NEXT: v_mov_b32_e32 v3, v18 ; GFX1013-NEXT: ; return to shader part epilog %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> @@ -284,43 +290,45 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v16, v0 -; GFX1030-NEXT: v_mov_b32_e32 v17, v1 -; GFX1030-NEXT: v_mov_b32_e32 v18, v2 -; GFX1030-NEXT: v_mov_b32_e32 v19, v3 -; GFX1030-NEXT: v_mov_b32_e32 v20, v4 -; GFX1030-NEXT: v_mov_b32_e32 v21, v5 -; GFX1030-NEXT: v_mov_b32_e32 v22, v6 -; GFX1030-NEXT: v_mov_b32_e32 v23, v7 -; GFX1030-NEXT: v_mov_b32_e32 v24, v8 -; GFX1030-NEXT: v_mov_b32_e32 v25, v9 -; GFX1030-NEXT: v_mov_b32_e32 v26, v10 -; GFX1030-NEXT: v_mov_b32_e32 v27, v11 +; GFX1030-NEXT: v_mov_b32_e32 v24, v0 +; GFX1030-NEXT: v_mov_b32_e32 v25, v1 +; GFX1030-NEXT: v_mov_b32_e32 v26, v2 +; GFX1030-NEXT: v_mov_b32_e32 v27, v3 +; GFX1030-NEXT: v_mov_b32_e32 v28, v4 +; GFX1030-NEXT: v_mov_b32_e32 v29, v5 +; GFX1030-NEXT: v_mov_b32_e32 v30, v6 +; GFX1030-NEXT: v_mov_b32_e32 v31, v7 +; GFX1030-NEXT: v_mov_b32_e32 v32, v8 +; GFX1030-NEXT: v_mov_b32_e32 v33, v9 +; GFX1030-NEXT: v_mov_b32_e32 v34, v10 +; GFX1030-NEXT: v_mov_b32_e32 v35, v11 +; GFX1030-NEXT: v_mov_b32_e32 v22, v14 +; GFX1030-NEXT: v_mov_b32_e32 v23, v15 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1030-NEXT: v_readfirstlane_b32 s4, v12 ; GFX1030-NEXT: v_readfirstlane_b32 s5, v13 -; GFX1030-NEXT: v_readfirstlane_b32 s6, v14 -; GFX1030-NEXT: v_readfirstlane_b32 s7, v15 +; GFX1030-NEXT: v_readfirstlane_b32 s6, v22 +; GFX1030-NEXT: v_readfirstlane_b32 s7, v23 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[12:13] -; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15] +; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[22:23] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[16:31], s[4:7] +; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[24:39], s[4:7] ; GFX1030-NEXT: ; implicit-def: $vgpr12 -; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr18 -; GFX1030-NEXT: ; implicit-def: $vgpr19 -; GFX1030-NEXT: ; implicit-def: $vgpr20 -; GFX1030-NEXT: ; implicit-def: $vgpr21 -; GFX1030-NEXT: ; implicit-def: $vgpr22 -; GFX1030-NEXT: ; implicit-def: $vgpr23 ; GFX1030-NEXT: ; implicit-def: $vgpr24 ; GFX1030-NEXT: ; implicit-def: $vgpr25 ; GFX1030-NEXT: ; implicit-def: $vgpr26 ; GFX1030-NEXT: ; implicit-def: $vgpr27 -; GFX1030-NEXT: ; implicit-def: $vgpr12_vgpr13_vgpr14_vgpr15 +; GFX1030-NEXT: ; implicit-def: $vgpr28 +; GFX1030-NEXT: ; implicit-def: $vgpr29 +; GFX1030-NEXT: ; implicit-def: $vgpr30 +; GFX1030-NEXT: ; implicit-def: $vgpr31 +; GFX1030-NEXT: ; implicit-def: $vgpr32 +; GFX1030-NEXT: ; implicit-def: $vgpr33 +; GFX1030-NEXT: ; implicit-def: $vgpr34 +; GFX1030-NEXT: ; implicit-def: $vgpr35 +; GFX1030-NEXT: ; implicit-def: $vgpr20_vgpr21_vgpr22_vgpr23 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB8_1 ; GFX1030-NEXT: ; %bb.2: @@ -332,32 +340,32 @@ ; GFX1013: ; %bb.0: ; GFX1013-NEXT: v_mov_b32_e32 v16, v12 ; GFX1013-NEXT: v_mov_b32_e32 v17, v13 -; GFX1013-NEXT: v_mov_b32_e32 v18, v14 -; GFX1013-NEXT: v_mov_b32_e32 v19, v15 +; GFX1013-NEXT: v_mov_b32_e32 v19, v14 +; GFX1013-NEXT: v_mov_b32_e32 v20, v15 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1013-NEXT: v_readfirstlane_b32 s4, v16 ; GFX1013-NEXT: v_readfirstlane_b32 s5, v17 -; GFX1013-NEXT: v_readfirstlane_b32 s6, v18 -; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s6, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s7, v20 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] -; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] +; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[19:20] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] +; GFX1013-NEXT: image_bvh64_intersect_ray v[21:24], v[0:15], s[4:7] ; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 +; GFX1013-NEXT: ; implicit-def: $vgpr17_vgpr18_vgpr19_vgpr20 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB8_1 ; GFX1013-NEXT: ; %bb.2: ; GFX1013-NEXT: s_mov_b32 exec_lo, s1 ; GFX1013-NEXT: s_waitcnt vmcnt(0) -; GFX1013-NEXT: v_mov_b32_e32 v0, v20 -; GFX1013-NEXT: v_mov_b32_e32 v1, v21 -; GFX1013-NEXT: v_mov_b32_e32 v2, v22 -; GFX1013-NEXT: v_mov_b32_e32 v3, v23 +; GFX1013-NEXT: v_mov_b32_e32 v0, v21 +; GFX1013-NEXT: v_mov_b32_e32 v1, v22 +; GFX1013-NEXT: v_mov_b32_e32 v2, v23 +; GFX1013-NEXT: v_mov_b32_e32 v3, v24 ; GFX1013-NEXT: ; return to shader part epilog %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> @@ -367,42 +375,44 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) { ; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr: ; GFX1030: ; %bb.0: -; GFX1030-NEXT: v_mov_b32_e32 v14, v0 -; GFX1030-NEXT: v_mov_b32_e32 v15, v1 +; GFX1030-NEXT: v_mov_b32_e32 v21, v0 +; GFX1030-NEXT: v_mov_b32_e32 v22, v1 ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6 ; GFX1030-NEXT: v_and_b32_e32 v1, 0xffff, v8 -; GFX1030-NEXT: v_mov_b32_e32 v16, v2 +; GFX1030-NEXT: v_mov_b32_e32 v23, v2 ; GFX1030-NEXT: v_and_b32_e32 v2, 0xffff, v9 -; GFX1030-NEXT: v_mov_b32_e32 v17, v3 +; GFX1030-NEXT: v_mov_b32_e32 v24, v3 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mov_b32_e32 v18, v4 -; GFX1030-NEXT: v_mov_b32_e32 v19, v5 -; GFX1030-NEXT: v_alignbit_b32 v22, v2, v8, 16 -; GFX1030-NEXT: v_and_or_b32 v20, v6, 0xffff, v0 -; GFX1030-NEXT: v_and_or_b32 v21, v7, 0xffff, v1 +; GFX1030-NEXT: v_mov_b32_e32 v25, v4 +; GFX1030-NEXT: v_mov_b32_e32 v26, v5 +; GFX1030-NEXT: v_mov_b32_e32 v19, v12 +; GFX1030-NEXT: v_mov_b32_e32 v20, v13 +; GFX1030-NEXT: v_and_or_b32 v27, v6, 0xffff, v0 +; GFX1030-NEXT: v_and_or_b32 v28, v7, 0xffff, v1 +; GFX1030-NEXT: v_alignbit_b32 v29, v2, v8, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX1030-NEXT: v_readfirstlane_b32 s4, v10 ; GFX1030-NEXT: v_readfirstlane_b32 s5, v11 -; GFX1030-NEXT: v_readfirstlane_b32 s6, v12 -; GFX1030-NEXT: v_readfirstlane_b32 s7, v13 +; GFX1030-NEXT: v_readfirstlane_b32 s6, v19 +; GFX1030-NEXT: v_readfirstlane_b32 s7, v20 ; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] -; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] +; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[19:20] ; GFX1030-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1030-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[14:29], s[4:7] a16 +; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[21:36], s[4:7] a16 ; GFX1030-NEXT: ; implicit-def: $vgpr10 -; GFX1030-NEXT: ; implicit-def: $vgpr14 -; GFX1030-NEXT: ; implicit-def: $vgpr15 -; GFX1030-NEXT: ; implicit-def: $vgpr16 -; GFX1030-NEXT: ; implicit-def: $vgpr17 -; GFX1030-NEXT: ; implicit-def: $vgpr18 -; GFX1030-NEXT: ; implicit-def: $vgpr19 -; GFX1030-NEXT: ; implicit-def: $vgpr20 ; GFX1030-NEXT: ; implicit-def: $vgpr21 ; GFX1030-NEXT: ; implicit-def: $vgpr22 -; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 +; GFX1030-NEXT: ; implicit-def: $vgpr23 +; GFX1030-NEXT: ; implicit-def: $vgpr24 +; GFX1030-NEXT: ; implicit-def: $vgpr25 +; GFX1030-NEXT: ; implicit-def: $vgpr26 +; GFX1030-NEXT: ; implicit-def: $vgpr27 +; GFX1030-NEXT: ; implicit-def: $vgpr28 +; GFX1030-NEXT: ; implicit-def: $vgpr29 +; GFX1030-NEXT: ; implicit-def: $vgpr17_vgpr18_vgpr19_vgpr20 ; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1030-NEXT: s_cbranch_execnz .LBB9_1 ; GFX1030-NEXT: ; %bb.2: @@ -417,8 +427,8 @@ ; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6 ; GFX1013-NEXT: v_and_b32_e32 v11, 0xffff, v8 ; GFX1013-NEXT: v_and_b32_e32 v9, 0xffff, v9 -; GFX1013-NEXT: v_mov_b32_e32 v18, v12 -; GFX1013-NEXT: v_mov_b32_e32 v19, v13 +; GFX1013-NEXT: v_mov_b32_e32 v19, v12 +; GFX1013-NEXT: v_mov_b32_e32 v20, v13 ; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 ; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11 ; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16 @@ -428,26 +438,26 @@ ; GFX1013-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX1013-NEXT: v_readfirstlane_b32 s4, v16 ; GFX1013-NEXT: v_readfirstlane_b32 s5, v17 -; GFX1013-NEXT: v_readfirstlane_b32 s6, v18 -; GFX1013-NEXT: v_readfirstlane_b32 s7, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s6, v19 +; GFX1013-NEXT: v_readfirstlane_b32 s7, v20 ; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17] -; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19] +; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[19:20] ; GFX1013-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX1013-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] a16 +; GFX1013-NEXT: image_bvh64_intersect_ray v[21:24], v[0:15], s[4:7] a16 ; GFX1013-NEXT: ; implicit-def: $vgpr16 ; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 -; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19 +; GFX1013-NEXT: ; implicit-def: $vgpr17_vgpr18_vgpr19_vgpr20 ; GFX1013-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1013-NEXT: s_cbranch_execnz .LBB9_1 ; GFX1013-NEXT: ; %bb.2: ; GFX1013-NEXT: s_mov_b32 exec_lo, s1 ; GFX1013-NEXT: s_waitcnt vmcnt(0) -; GFX1013-NEXT: v_mov_b32_e32 v0, v20 -; GFX1013-NEXT: v_mov_b32_e32 v1, v21 -; GFX1013-NEXT: v_mov_b32_e32 v2, v22 -; GFX1013-NEXT: v_mov_b32_e32 v3, v23 +; GFX1013-NEXT: v_mov_b32_e32 v0, v21 +; GFX1013-NEXT: v_mov_b32_e32 v1, v22 +; GFX1013-NEXT: v_mov_b32_e32 v2, v23 +; GFX1013-NEXT: v_mov_b32_e32 v3, v24 ; GFX1013-NEXT: ; return to shader part epilog %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) %r = bitcast <4 x i32> %v to <4 x float> diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -2282,11 +2282,11 @@ ; SI-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:2 ; SI-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:1 ; SI-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 -; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_load_dword s4, s[0:1], 0x0 ; SI-NEXT: s_waitcnt vmcnt(3) ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_fma_f32 v0, s0, v0, 0.5 +; SI-NEXT: v_fma_f32 v0, s4, v0, 0.5 ; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 ; SI-NEXT: s_waitcnt vmcnt(2) ; SI-NEXT: buffer_store_byte v1, off, s[0:3], 0 @@ -2309,11 +2309,11 @@ ; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:2 ; VI-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:1 ; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 -; VI-NEXT: s_load_dword s0, s[0:1], 0x0 +; VI-NEXT: s_load_dword s4, s[0:1], 0x0 ; VI-NEXT: s_waitcnt vmcnt(3) ; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_mul_f32_e32 v0, s0, v0 +; VI-NEXT: v_mul_f32_e32 v0, s4, v0 ; VI-NEXT: v_add_f32_e32 v0, 0.5, v0 ; VI-NEXT: v_cvt_i32_f32_e32 v0, v0 ; VI-NEXT: s_waitcnt vmcnt(2) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -330,6 +330,7 @@ ; GCN-O1-NEXT: SI optimize exec mask operations pre-RA ; GCN-O1-NEXT: Machine Natural Loop Construction ; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: Live Interval Analysis ; GCN-O1-NEXT: Debug Variable Analysis ; GCN-O1-NEXT: Live Stack Slot Analysis ; GCN-O1-NEXT: Virtual Register Map @@ -617,6 +618,7 @@ ; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA ; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis +; GCN-O1-OPTS-NEXT: Live Interval Analysis ; GCN-O1-OPTS-NEXT: Debug Variable Analysis ; GCN-O1-OPTS-NEXT: Live Stack Slot Analysis ; GCN-O1-OPTS-NEXT: Virtual Register Map @@ -903,6 +905,7 @@ ; GCN-O2-NEXT: Live Register Matrix ; GCN-O2-NEXT: SI Pre-allocate WWM Registers ; GCN-O2-NEXT: SI optimize exec mask operations pre-RA +; GCN-O2-NEXT: Live Interval Analysis ; GCN-O2-NEXT: SI Form memory clauses ; GCN-O2-NEXT: Machine Natural Loop Construction ; GCN-O2-NEXT: Machine Block Frequency Analysis @@ -1204,6 +1207,7 @@ ; GCN-O3-NEXT: Live Register Matrix ; GCN-O3-NEXT: SI Pre-allocate WWM Registers ; GCN-O3-NEXT: SI optimize exec mask operations pre-RA +; GCN-O3-NEXT: Live Interval Analysis ; GCN-O3-NEXT: SI Form memory clauses ; GCN-O3-NEXT: Machine Natural Loop Construction ; GCN-O3-NEXT: Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -473,12 +473,12 @@ ; GCN-LABEL: s_test_sdiv24_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 40 +; GCN-NEXT: s_ashr_i64 s[8:9], s[8:9], 40 ; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-NEXT: s_mov_b32 s1, s5 ; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 40 @@ -503,12 +503,12 @@ ; GCN-IR-LABEL: s_test_sdiv24_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 40 +; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[8:9], 40 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-IR-NEXT: s_mov_b32 s1, s5 ; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 40 @@ -643,12 +643,12 @@ ; GCN-LABEL: s_test_sdiv31_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 33 +; GCN-NEXT: s_ashr_i64 s[8:9], s[8:9], 33 ; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-NEXT: s_mov_b32 s1, s5 ; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 33 @@ -673,12 +673,12 @@ ; GCN-IR-LABEL: s_test_sdiv31_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 33 +; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[8:9], 33 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-IR-NEXT: s_mov_b32 s1, s5 ; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 33 @@ -710,12 +710,12 @@ ; GCN-LABEL: s_test_sdiv23_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 41 +; GCN-NEXT: s_ashr_i64 s[8:9], s[8:9], 41 ; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-NEXT: s_mov_b32 s1, s5 ; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 41 @@ -740,12 +740,12 @@ ; GCN-IR-LABEL: s_test_sdiv23_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 41 +; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[8:9], 41 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-IR-NEXT: s_mov_b32 s1, s5 ; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 41 @@ -777,12 +777,12 @@ ; GCN-LABEL: s_test_sdiv25_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s0, s4 -; GCN-NEXT: s_ashr_i64 s[8:9], s[0:1], 39 +; GCN-NEXT: s_ashr_i64 s[8:9], s[8:9], 39 ; GCN-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-NEXT: s_mov_b32 s1, s5 ; GCN-NEXT: s_ashr_i64 s[4:5], s[6:7], 39 @@ -807,12 +807,12 @@ ; GCN-IR-LABEL: s_test_sdiv25_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) ; GCN-IR-NEXT: s_mov_b32 s0, s4 -; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[0:1], 39 +; GCN-IR-NEXT: s_ashr_i64 s[8:9], s[8:9], 39 ; GCN-IR-NEXT: v_cvt_f32_i32_e32 v0, s8 ; GCN-IR-NEXT: s_mov_b32 s1, s5 ; GCN-IR-NEXT: s_ashr_i64 s[4:5], s[6:7], 39 diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir --- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -29,8 +29,8 @@ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc - ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: dead %0:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -66,7 +66,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: S_BRANCH %bb.2 @@ -111,7 +111,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: S_BRANCH %bb.2 @@ -167,7 +167,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: S_BRANCH %bb.2 @@ -223,7 +223,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: S_BRANCH %bb.2 @@ -276,7 +276,7 @@ ; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: S_BRANCH %bb.2 @@ -343,7 +343,7 @@ ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY4]], implicit-def dead $scc - ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]] ; GCN: dead %8:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN: bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll --- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -54,8 +54,8 @@ ; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v2 ; CHECK-NEXT: .LBB1_1: ; %bb9 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]