Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -412,8 +412,14 @@ class UniformBinFrag : PatFrag < (ops node:$src0, node:$src1), (Op $src0, $src1), - [{ return !N->isDivergent(); }] ->; + [{ return !N->isDivergent(); }]> { + // This check is unnecessary as it's captured by the result register + // bank constraint. + // + // FIXME: Should add a way for the emitter to recognize this is a + // trivially true predicate to eliminate the check. + let GISelPredicateCode = [{return true;}]; +} let Defs = [SCC] in { // Carry out goes to SCC let isCommutable = 1 in { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -979,7 +979,7 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, -1 +; GPRIDX-NEXT: s_add_i32 m0, s18, -1 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 @@ -1001,7 +1001,7 @@ ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, -1 +; MOVREL-NEXT: s_add_i32 m0, s18, -1 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 @@ -1031,7 +1031,7 @@ ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 -; GPRIDX-NEXT: s_add_u32 s7, s6, 3 +; GPRIDX-NEXT: s_add_i32 s7, s6, 3 ; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) @@ -1056,7 +1056,7 @@ ; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 -; MOVREL-NEXT: s_add_u32 s6, s6, 3 +; MOVREL-NEXT: s_add_i32 s6, s6, 3 ; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v18, v1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -2093,7 +2093,7 @@ ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec ; GPRIDX-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18 -; GPRIDX-NEXT: s_add_u32 s3, s2, 1 +; GPRIDX-NEXT: s_add_i32 s3, s2, 1 ; GPRIDX-NEXT: s_lshl_b32 s3, s3, 1 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18 ; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) @@ -2139,7 +2139,7 @@ ; MOVREL-NEXT: v_mov_b32_e32 v19, v0 ; MOVREL-NEXT: v_mov_b32_e32 v33, v14 ; MOVREL-NEXT: v_mov_b32_e32 v32, v13 -; MOVREL-NEXT: s_add_u32 s2, s1, 1 +; MOVREL-NEXT: s_add_i32 s2, s1, 1 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18 ; MOVREL-NEXT: v_mov_b32_e32 v31, v12 ; MOVREL-NEXT: v_mov_b32_e32 v30, v11 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -17,20 +17,20 @@ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec + ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec - ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit %7, implicit %8, implicit %9 + ; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9 ; GFX9-LABEL: name: add_s32 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec - ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:vgpr(s32) = COPY $vgpr0 @@ -123,14 +123,14 @@ ; GFX6: liveins: $sgpr0 ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]] ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 16 %2:sgpr(s32) = G_ADD %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir @@ -123,8 +123,8 @@ ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; CHECK: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc - ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_CTPOP %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -316,16 +316,16 @@ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -378,16 +378,16 @@ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -469,16 +469,16 @@ ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] ; GPRIDX-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 ; GPRIDX: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -700,16 +700,16 @@ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0 ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] @@ -764,16 +764,16 @@ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 ; GPRIDX: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0 ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -534,8 +534,8 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 @@ -543,8 +543,8 @@ ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 8, implicit-def $m0, implicit $m0 + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 8, implicit-def $m0, implicit $m0 ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] @@ -603,8 +603,8 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 @@ -612,8 +612,8 @@ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -18,26 +18,26 @@ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc - ; GFX8: S_ENDPGM 0, implicit [[S_ADD_U32_1]] + ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_ADD_I32_1]] ; GFX9-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc - ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_1]] + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_1]] ; GFX10-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc - ; GFX10: S_ENDPGM 0, implicit [[S_ADD_U32_1]] + ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_ADD_I32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-smulh.mir @@ -1,8 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s -# RUN: FileCheck -check-prefix=ERR %s < %t -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=SI %s # RUN: FileCheck -check-prefix=ERR %s < %t +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # ERR-NOT: remark: # ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_SMULH %0:sgpr, %1:sgpr (in function: smulh_s32_ss) @@ -16,11 +15,17 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GCN-LABEL: name: smulh_s32_ss - ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GCN: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] - ; GCN: S_ENDPGM 0, implicit [[SMULH]](s32) + + ; SI-LABEL: name: smulh_s32_ss + ; SI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; SI: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]] + ; SI: S_ENDPGM 0, implicit [[SMULH]](s32) + ; GFX9-LABEL: name: smulh_s32_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MUL_HI_I32_:%[0-9]+]]:sreg_32 = S_MUL_HI_I32 [[COPY]], [[COPY1]] + ; GFX9: S_ENDPGM 0, implicit [[S_MUL_HI_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_SMULH %0, %1 @@ -35,11 +40,17 @@ body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: smulh_s32_sv - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] + + ; SI-LABEL: name: smulh_s32_sv + ; SI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec + ; SI: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] + ; GFX9-LABEL: name: smulh_s32_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = G_SMULH %0, %1 @@ -54,11 +65,17 @@ body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: smulh_s32_vs - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] + + ; SI-LABEL: name: smulh_s32_vs + ; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec + ; SI: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] + ; GFX9-LABEL: name: smulh_s32_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_SMULH %0, %1 @@ -73,11 +90,17 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - ; GCN-LABEL: name: smulh_s32_vv - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] + + ; SI-LABEL: name: smulh_s32_vv + ; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; SI: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec + ; SI: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] + ; GFX9-LABEL: name: smulh_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_MUL_HI_I32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_I32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_SMULH %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -20,9 +20,9 @@ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_U32_]], %7, 0, implicit $exec + ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_I32_e64 %8, [[COPY2]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %9 ; GFX9-LABEL: name: sub_s32 @@ -30,9 +30,9 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec ; GFX9: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] ; GFX10-LABEL: name: sub_s32 @@ -41,9 +41,9 @@ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec + ; GFX10: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec ; GFX10: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-umulh.mir @@ -1,8 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s -# RUN: FileCheck -check-prefix=ERR %s < %t -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=SI %s # RUN: FileCheck -check-prefix=ERR %s < %t +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s # ERR-NOT: remark: # ERR: remark: :0:0: cannot select: %2:sgpr(s32) = G_UMULH %0:sgpr, %1:sgpr (in function: umulh_s32_ss) @@ -16,11 +15,17 @@ body: | bb.0: liveins: $sgpr0, $sgpr1 - ; GCN-LABEL: name: umulh_s32_ss - ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GCN: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GCN: S_ENDPGM 0, implicit [[UMULH]](s32) + + ; SI-LABEL: name: umulh_s32_ss + ; SI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; SI: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; SI: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] + ; SI: S_ENDPGM 0, implicit [[UMULH]](s32) + ; GFX9-LABEL: name: umulh_s32_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY]], [[COPY1]] + ; GFX9: S_ENDPGM 0, implicit [[S_MUL_HI_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_UMULH %0, %1 @@ -35,11 +40,17 @@ body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: umulh_s32_sv - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] + + ; SI-LABEL: name: umulh_s32_sv + ; SI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec + ; SI: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] + ; GFX9-LABEL: name: umulh_s32_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = G_UMULH %0, %1 @@ -54,11 +65,17 @@ body: | bb.0: liveins: $sgpr0, $vgpr0 - ; GCN-LABEL: name: umulh_s32_vs - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] + + ; SI-LABEL: name: umulh_s32_vs + ; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; SI: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec + ; SI: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] + ; GFX9-LABEL: name: umulh_s32_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s32) = G_UMULH %0, %1 @@ -73,11 +90,17 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - ; GCN-LABEL: name: umulh_s32_vv - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] + + ; SI-LABEL: name: umulh_s32_vv + ; SI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; SI: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec + ; SI: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] + ; GFX9-LABEL: name: umulh_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY]], [[COPY1]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_HI_U32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 %2:vgpr(s32) = G_UMULH %0, %1