Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -413,7 +413,9 @@ (ops node:$src0, node:$src1), (Op $src0, $src1), [{ return !N->isDivergent(); }] ->; +> { + let GISelPredicateCode = [{return true;}]; +} let Defs = [SCC] in { // Carry out goes to SCC let isCommutable = 1 in { @@ -535,22 +537,22 @@ let Defs = [SCC] in { // TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32", - [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", - [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", - [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64", - [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I32 : SOP2_32 <"s_ashr_i32", - [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64", - [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; } // End Defs = [SCC] Index: llvm/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -541,14 +541,17 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; } // End SubtargetPredicate = isGFX6GFX7 -let SubtargetPredicate = isGFX6GFX7GFX10 in { let isCommutable = 1 in { +let SubtargetPredicate = isGFX6GFX7GFX10 in { defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; -defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>; -defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>; -defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>; -} // End isCommutable = 1 } // End SubtargetPredicate = isGFX6GFX7GFX10 +let SubtargetPredicate = isGFX6GFX7 in { +defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN, srl>; +defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN, sra>; +defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN, shl>; +} // End SubtargetPredicate = isGFX6GFX7 +} // End isCommutable = 1 + class DivergentBinOp : GCNPat< Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -385,10 +385,12 @@ } let SchedRW = [Write64Bit] in { -let SubtargetPredicate = isGFX6GFX7GFX10 in { +let SubtargetPredicate = isGFX6GFX7 in { def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile, shl>; def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile, srl>; def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile, sra>; +} // End SubtargetPredicate = isGFX6GFX7 +let SubtargetPredicate = isGFX6GFX7GFX10 in { def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; } // End SubtargetPredicate = isGFX6GFX7GFX10 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -979,7 +979,7 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s0, s2 ; GPRIDX-NEXT: s_mov_b32 s1, s3 -; GPRIDX-NEXT: s_add_u32 m0, s18, -1 +; GPRIDX-NEXT: s_add_i32 m0, s18, -1 ; GPRIDX-NEXT: s_mov_b32 s2, s4 ; GPRIDX-NEXT: s_mov_b32 s3, s5 ; GPRIDX-NEXT: s_mov_b32 s4, s6 @@ -1001,7 +1001,7 @@ ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s0, s2 ; MOVREL-NEXT: s_mov_b32 s1, s3 -; MOVREL-NEXT: s_add_u32 m0, s18, -1 +; MOVREL-NEXT: s_add_i32 m0, s18, -1 ; MOVREL-NEXT: s_mov_b32 s2, s4 ; MOVREL-NEXT: s_mov_b32 s3, s5 ; MOVREL-NEXT: s_mov_b32 s4, s6 @@ -1031,7 +1031,7 @@ ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec ; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 -; GPRIDX-NEXT: s_add_u32 s7, s6, 3 +; GPRIDX-NEXT: s_add_i32 s7, s6, 3 ; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) @@ -1056,7 +1056,7 @@ ; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 -; MOVREL-NEXT: s_add_u32 s6, s6, 3 +; MOVREL-NEXT: s_add_i32 s6, s6, 3 ; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 ; MOVREL-NEXT: v_movrels_b32_e32 v18, v1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -2093,7 +2093,7 @@ ; GPRIDX-NEXT: s_mov_b64 s[0:1], exec ; GPRIDX-NEXT: BB32_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s2, v18 -; GPRIDX-NEXT: s_add_u32 s3, s2, 1 +; GPRIDX-NEXT: s_add_i32 s3, s2, 1 ; GPRIDX-NEXT: s_lshl_b32 s3, s3, 1 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v18 ; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) @@ -2139,7 +2139,7 @@ ; MOVREL-NEXT: v_mov_b32_e32 v19, v0 ; MOVREL-NEXT: v_mov_b32_e32 v33, v14 ; MOVREL-NEXT: v_mov_b32_e32 v32, v13 -; MOVREL-NEXT: s_add_u32 s2, s1, 1 +; MOVREL-NEXT: s_add_i32 s2, s1, 1 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v18 ; MOVREL-NEXT: v_mov_b32_e32 v31, v12 ; MOVREL-NEXT: v_mov_b32_e32 v30, v11 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- @@ -17,20 +16,29 @@ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec + ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit %7, implicit %8, implicit %9 + ; GFX8-LABEL: name: add_s32 + ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX8: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9 ; GFX9-LABEL: name: add_s32 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec - ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec + ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:vgpr(s32) = COPY $vgpr0 @@ -69,6 +77,11 @@ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc ; GFX6: S_ENDPGM 0, implicit [[S_SUB_I32_]] + ; GFX8-LABEL: name: add_neg_inline_const_64_to_sub_s32_s + ; GFX8: liveins: $sgpr0 + ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX8: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_SUB_I32_]] ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_s ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 @@ -97,6 +110,12 @@ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %2 + ; GFX8-LABEL: name: add_neg_inline_const_64_to_sub_s32_v + ; GFX8: liveins: $vgpr0 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec + ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX8: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 @@ -125,12 +144,18 @@ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; GFX8-LABEL: name: add_neg_inline_const_16_to_sub_s32_s + ; GFX8: liveins: $sgpr0 + ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_ADD_I32_]] ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s ; GFX9: liveins: $sgpr0 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc - ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_CONSTANT i32 16 %2:sgpr(s32) = G_ADD %0, %1 @@ -154,6 +179,12 @@ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %2 + ; GFX8-LABEL: name: add_neg_inline_const_16_to_sub_s32_v + ; GFX8: liveins: $vgpr0 + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX8: S_ENDPGM 0, implicit %2 ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v ; GFX9: liveins: $vgpr0 ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -237,8 +237,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_ASHR_I64_:%[0-9]+]]:vreg_64 = V_ASHR_I64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ASHR_I64_]] + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_ASHR %0, %1 @@ -277,8 +277,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_ASHR_I64_:%[0-9]+]]:vreg_64 = V_ASHR_I64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ASHR_I64_]] + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_ASHR %0, %1 @@ -317,8 +317,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_ASHR_I64_:%[0-9]+]]:vreg_64 = V_ASHR_I64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_ASHR_I64_]] + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_ASHR %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir @@ -123,8 +123,8 @@ ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; CHECK: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc - ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc + ; CHECK: S_ENDPGM 0, implicit [[S_ADD_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = G_CTPOP %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL %s -# RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-vgpr-index-mode -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL_GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MOVREL_GFX8 %s +# RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-vgpr-index-mode -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX_GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GPRIDX_GFX9 %s --- name: extract_vector_elt_s_s32_v2s32 @@ -25,6 +25,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v2s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -52,6 +76,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v3s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_96 = COPY $sgpr0_sgpr1_sgpr2 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 %1:sgpr(s32) = COPY $sgpr2 %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -79,6 +127,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v4s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -106,6 +178,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v8s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -133,6 +229,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v16s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -160,6 +280,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v32s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -187,6 +331,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s64_v2s64 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<2 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:sgpr(s32) = COPY $sgpr4 %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -214,6 +382,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s64_v4s64 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -241,6 +433,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s64_v8s64 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -268,6 +484,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s64_v16s64 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_1024 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<16 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 %1:sgpr(s32) = COPY $sgpr40 %2:sgpr(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -295,6 +535,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_1 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 1 @@ -316,7 +580,7 @@ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_U32_]] ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] @@ -328,6 +592,38 @@ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL_GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX6: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_m1 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX_GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX9: $m0 = COPY [[S_ADD_I32_]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 -1 @@ -357,6 +653,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_7 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub7, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 7 @@ -378,7 +698,7 @@ ; MOVREL: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_U32_]] ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] @@ -390,6 +710,38 @@ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL_GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX6: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX6: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; GPRIDX_GFX8: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s32_v8s32_idx_offset_8 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX_GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX9: $m0 = COPY [[S_ADD_I32_]] + ; GPRIDX_GFX9: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 8 @@ -419,6 +771,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_1 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub2_sub3, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 1 @@ -448,6 +824,30 @@ ; GPRIDX: $m0 = COPY [[COPY1]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_2 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: $m0 = COPY [[COPY1]] + ; GPRIDX_GFX9: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub4_sub5, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 2 @@ -469,7 +869,7 @@ ; MOVREL: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_U32_]] ; MOVREL: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] @@ -481,6 +881,38 @@ ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] ; GPRIDX: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL_GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX6: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX6: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; GPRIDX_GFX8: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_s_s64_v8s64_idx_offset_m1 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX_GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX9: $m0 = COPY [[S_ADD_I32_]] + ; GPRIDX_GFX9: [[S_MOVRELS_B64_:%[0-9]+]]:sreg_64 = S_MOVRELS_B64 [[COPY]].sub0_sub1, implicit $m0, implicit [[COPY]] + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[S_MOVRELS_B64_]] %0:sgpr(<8 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 -1 @@ -511,6 +943,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v2s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr2 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -539,6 +997,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v3s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:sgpr(s32) = COPY $sgpr2 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -567,6 +1051,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v4s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:sgpr(s32) = COPY $sgpr4 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -595,6 +1105,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v8s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -623,6 +1159,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v16s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:sgpr(s32) = COPY $sgpr8 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -651,6 +1213,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v32s32 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_1024 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr40 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:sgpr(s32) = COPY $sgpr40 %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -679,6 +1267,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub1, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_1 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub1, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 1 @@ -700,7 +1314,7 @@ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_U32_]] ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] @@ -713,6 +1327,40 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL_GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX6: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; MOVREL_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_m1 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; GPRIDX_GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 -1 @@ -743,6 +1391,32 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: $m0 = COPY [[COPY1]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: $m0 = COPY [[COPY1]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub7, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_7 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[COPY1]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub7, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 7 @@ -764,7 +1438,7 @@ ; MOVREL: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc ; MOVREL: $m0 = COPY [[S_ADD_U32_]] ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] @@ -777,6 +1451,40 @@ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MOVREL_GFX6-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; MOVREL_GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL_GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX6: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX6: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX6: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; MOVREL_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; MOVREL_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; MOVREL_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; MOVREL_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; MOVREL_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL_GFX8: $m0 = COPY [[S_ADD_I32_]] + ; MOVREL_GFX8: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]] + ; MOVREL_GFX8: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]] + ; GPRIDX_GFX8-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; GPRIDX_GFX8: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX_GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX8: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX8: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX8: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; GPRIDX_GFX9-LABEL: name: extract_vector_elt_v_s32_v8s32_idx_offset_8 + ; GPRIDX_GFX9: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; GPRIDX_GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 + ; GPRIDX_GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GPRIDX_GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX_GFX9: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 1, implicit-def $m0, implicit $m0 + ; GPRIDX_GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0 + ; GPRIDX_GFX9: S_SET_GPR_IDX_OFF + ; GPRIDX_GFX9: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:sgpr(s32) = COPY $sgpr8 %2:sgpr(s32) = G_CONSTANT i32 8 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir @@ -534,8 +534,8 @@ ; MOVREL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_vvs_s32_v8s32_add_8 @@ -543,8 +543,8 @@ ; GPRIDX: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_U32_]], 8, implicit-def $m0, implicit $m0 + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: S_SET_GPR_IDX_ON [[S_ADD_I32_]], 8, implicit-def $m0, implicit $m0 ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:vreg_256 = V_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec ; GPRIDX: S_SET_GPR_IDX_OFF ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V8_]] @@ -603,8 +603,8 @@ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; MOVREL: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; MOVREL: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; MOVREL: $m0 = COPY [[S_ADD_U32_]] + ; MOVREL: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; MOVREL: $m0 = COPY [[S_ADD_I32_]] ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v8s32_add_8 @@ -612,8 +612,8 @@ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8 ; GPRIDX: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr9 ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 - ; GPRIDX: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GPRIDX: $m0 = COPY [[S_ADD_U32_]] + ; GPRIDX: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GPRIDX: $m0 = COPY [[S_ADD_I32_]] ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V8_:%[0-9]+]]:sreg_256 = S_INDIRECT_REG_WRITE_B32_V8 [[COPY]], [[COPY1]], 1, implicit $m0 ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V8_]] %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir @@ -237,8 +237,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_LSHR_B64_:%[0-9]+]]:vreg_64 = V_LSHR_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHR_B64_]] + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_LSHR %0, %1 @@ -277,8 +277,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_LSHR_B64_:%[0-9]+]]:vreg_64 = V_LSHR_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHR_B64_]] + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_LSHR %0, %1 @@ -317,8 +317,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_LSHR_B64_:%[0-9]+]]:vreg_64 = V_LSHR_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHR_B64_]] + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_LSHR %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir @@ -18,26 +18,26 @@ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX8: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc - ; GFX8: S_ENDPGM 0, implicit [[S_ADD_U32_1]] + ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_ADD_I32_1]] ; GFX9-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX9: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc - ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_1]] + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_1]] ; GFX10-LABEL: name: add_s32_sgpr_sgpr_sgpr ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX10: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_ADD_U32_]], [[COPY2]], implicit-def $scc - ; GFX10: S_ENDPGM 0, implicit [[S_ADD_U32_1]] + ; GFX10: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], [[COPY2]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_ADD_I32_1]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s32) = COPY $sgpr2 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir @@ -237,8 +237,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[V_LSHL_B64_:%[0-9]+]]:vreg_64 = V_LSHL_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHL_B64_]] + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s64) = G_SHL %0, %1 @@ -277,8 +277,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX10: [[V_LSHL_B64_:%[0-9]+]]:vreg_64 = V_LSHL_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHL_B64_]] + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 %2:vgpr(s64) = G_SHL %0, %1 @@ -317,8 +317,8 @@ ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_LSHL_B64_:%[0-9]+]]:vreg_64 = V_LSHL_B64 [[COPY]], [[COPY1]], implicit $exec - ; GFX10: S_ENDPGM 0, implicit [[V_LSHL_B64_]] + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_SHL %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir @@ -1,5 +1,4 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s @@ -20,9 +19,9 @@ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_U32_]], %7, 0, implicit $exec + ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_I32_e64 %8, [[COPY2]], 0, implicit $exec ; GFX6: S_ENDPGM 0, implicit %9 ; GFX9-LABEL: name: sub_s32 @@ -30,9 +29,9 @@ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX9: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec ; GFX9: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] ; GFX10-LABEL: name: sub_s32 @@ -41,9 +40,9 @@ ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX10: [[S_SUB_U32_:%[0-9]+]]:sreg_32 = S_SUB_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_U32_]], 0, implicit $exec - ; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_U32_]], [[V_SUB_U32_e64_]], 0, implicit $exec + ; GFX10: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec + ; GFX10: [[V_SUB_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[S_SUB_I32_]], [[V_SUB_U32_e64_]], 0, implicit $exec ; GFX10: [[V_SUB_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[V_SUB_U32_e64_1]], [[COPY2]], 0, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_SUB_U32_e64_2]] %0:sgpr(s32) = COPY $sgpr0 Index: llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -42,8 +42,8 @@ ; CI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 ; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, [[LHS]] ; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} -; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CI: v_ashr_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CI: v_ashr_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} ; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} Index: llvm/test/CodeGen/AMDGPU/bfe-patterns.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -24,8 +24,11 @@ ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] -; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] -; GCN-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] +; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] +; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] + +; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] +; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] ; GCN: [[BFE]] ; GCN: [[SHL]] @@ -97,8 +100,11 @@ ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] ; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] -; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] -; GCN-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] +; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] +; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] + +; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]] +; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]] ; GCN: [[BFE]] ; GCN: [[SHL]] Index: llvm/test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/commute-shifts.ll +++ llvm/test/CodeGen/AMDGPU/commute-shifts.ll @@ -17,7 +17,7 @@ ; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; SI-NEXT: v_and_b32_e32 v0, 7, v0 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; SI-NEXT: v_lshr_b32_e32 v0, v2, v0 ; SI-NEXT: v_and_b32_e32 v0, 1, v0 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc Index: llvm/test/CodeGen/AMDGPU/extract-lowbits.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -169,8 +169,8 @@ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 ; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 -; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 -; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; SI-NEXT: v_lshl_b32_e32 v0, v0, v1 +; SI-NEXT: v_lshr_b32_e32 v0, v0, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: bzhi32_d1_indexzext: Index: llvm/test/CodeGen/AMDGPU/inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/inline-asm.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=PRE-GFX8 %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s ; CHECK-LABEL: {{^}}inline_asm: ; CHECK: s_endpgm @@ -241,7 +241,8 @@ ; CHECK: ; def v0 ; CHECK: v_mov_b32_e32 v1, v0 ; CHECK: ; def v0 -; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 +; PRE-GFX8: v_lshl_b32_e32 v{{[0-9]+}}, v1, v0 +; GFX8: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 define amdgpu_kernel void @muliple_def_phys_vgpr() { entry: %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() Index: llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s @@ -123,8 +125,8 @@ ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, s8, v2 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 -; CI-NEXT: v_lshrrev_b32_e32 v2, v3, v2 -; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v4 +; CI-NEXT: v_lshr_b32_e32 v2, v2, v3 +; CI-NEXT: v_lshr_b32_e32 v3, v4, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -490,10 +492,10 @@ ; CI-NEXT: v_and_b32_e32 v4, s8, v4 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 ; CI-NEXT: v_and_b32_e32 v5, s8, v5 -; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v3 -; CI-NEXT: v_lshrrev_b32_e32 v5, v9, v7 -; CI-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; CI-NEXT: v_lshrrev_b32_e32 v4, v8, v6 +; CI-NEXT: v_lshr_b32_e32 v3, v3, v5 +; CI-NEXT: v_lshr_b32_e32 v5, v7, v9 +; CI-NEXT: v_lshr_b32_e32 v2, v2, v4 +; CI-NEXT: v_lshr_b32_e32 v4, v6, v8 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; CI-NEXT: v_or_b32_e32 v3, v3, v5 Index: llvm/test/CodeGen/AMDGPU/sext-in-reg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sext-in-reg.ll +++ llvm/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -577,7 +577,7 @@ ; GCN: {{buffer|flat|global}}_load_ushort [[VAL0:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_ushort [[VAL1:v[0-9]+]] -; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] +; SI: v_lshl_b32_e32 [[REG:v[0-9]+]], [[VAL0]], [[VAL1]] ; GFX89: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}} Index: llvm/test/CodeGen/AMDGPU/shift-select.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/shift-select.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX6 %s +; RUN: llc -march=amdgcn -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s + +; GCN-LABEL: name: s_shl_i32 +; GCN: S_LSHL_B32 +define amdgpu_kernel void @s_shl_i32(i32 addrspace(1)* %out, i32 %lhs, i32 %rhs) { + %result = shl i32 %lhs, %rhs + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: v_shl_i32 +; GFX6: V_LSHL_B32_e32 +; GFX8: V_LSHLREV_B32_e32 +define amdgpu_kernel void @v_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = shl i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: s_lshr_i32 +; GCN: S_LSHR_B32 +define amdgpu_kernel void @s_lshr_i32(i32 addrspace(1)* %out, i32 %lhs, i32 %rhs) { + %result = lshr i32 %lhs, %rhs + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: v_lshr_i32 +; GFX6: V_LSHR_B32_e32 +; GFX8: V_LSHRREV_B32_e64 +define amdgpu_kernel void @v_lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = lshr i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: s_ashr_i32 +; GCN: S_ASHR_I32 +define amdgpu_kernel void @s_ashr_i32(i32 addrspace(1)* %out, i32 %lhs, i32 %rhs) #0 { + %result = ashr i32 %lhs, %rhs + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: v_ashr_i32 +; GFX6: V_ASHR_I32_e32 +; GFX8: V_ASHRREV_I32_e64 +define amdgpu_kernel void @v_ashr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = ashr i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + + +declare i32 @llvm.amdgcn.workitem.id.x() \ No newline at end of file Index: llvm/test/CodeGen/AMDGPU/shl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shl.ll +++ llvm/test/CodeGen/AMDGPU/shl.ll @@ -301,7 +301,7 @@ ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_add_i32_e32 v0, vcc, 3, v0 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GCN-NEXT: v_lshlrev_b32_e32 v0, v0, v2 +; GCN-NEXT: v_lshl_b32_e32 v0, v2, v0 ; GCN-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; @@ -425,8 +425,8 @@ ; GCN-NEXT: v_and_b32_e32 v0, s0, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v0, v0, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v1, v3, v2 +; GCN-NEXT: v_lshl_b32_e32 v0, v1, v0 +; GCN-NEXT: v_lshl_b32_e32 v1, v2, v3 ; GCN-NEXT: v_and_b32_e32 v0, s0, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GCN-NEXT: v_or_b32_e32 v0, v0, v1 @@ -500,10 +500,10 @@ ; GCN-NEXT: v_and_b32_e32 v9, s8, v5 ; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; GCN-NEXT: v_lshlrev_b32_e32 v5, v5, v7 -; GCN-NEXT: v_lshlrev_b32_e32 v3, v9, v3 -; GCN-NEXT: v_lshlrev_b32_e32 v4, v4, v6 -; GCN-NEXT: v_lshlrev_b32_e32 v2, v8, v2 +; GCN-NEXT: v_lshl_b32_e32 v5, v7, v5 +; GCN-NEXT: v_lshl_b32_e32 v3, v3, v9 +; GCN-NEXT: v_lshl_b32_e32 v4, v6, v4 +; GCN-NEXT: v_lshl_b32_e32 v2, v2, v8 ; GCN-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GCN-NEXT: v_and_b32_e32 v3, s8, v3 ; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v4 Index: llvm/test/CodeGen/AMDGPU/shl.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shl.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -123,8 +123,8 @@ ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_and_b32_e32 v5, s8, v3 ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; CI-NEXT: v_lshlrev_b32_e32 v3, v3, v4 -; CI-NEXT: v_lshlrev_b32_e32 v2, v5, v2 +; CI-NEXT: v_lshl_b32_e32 v3, v4, v3 +; CI-NEXT: v_lshl_b32_e32 v2, v2, v5 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, s8, v2 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 @@ -491,10 +491,10 @@ ; CI-NEXT: v_and_b32_e32 v9, s8, v5 ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; CI-NEXT: v_lshlrev_b32_e32 v5, v5, v7 -; CI-NEXT: v_lshlrev_b32_e32 v3, v9, v3 -; CI-NEXT: v_lshlrev_b32_e32 v4, v4, v6 -; CI-NEXT: v_lshlrev_b32_e32 v2, v8, v2 +; CI-NEXT: v_lshl_b32_e32 v5, v7, v5 +; CI-NEXT: v_lshl_b32_e32 v3, v3, v9 +; CI-NEXT: v_lshl_b32_e32 v4, v6, v4 +; CI-NEXT: v_lshl_b32_e32 v2, v2, v8 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4