diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1136,14 +1136,18 @@ multiclass VOP3Only_Real_gfx10 op> { def _e64_gfx10 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX10>, - VOP3e_gfx10(NAME#"_e64").Pfl>; + VOP3e_gfx10(NAME#"_e64").Pfl> { + let IsSingle = 1; + } } //===---------------------------- VOP3beOnly ----------------------------===// multiclass VOP3beOnly_Real_gfx10 op> { def _e64_gfx10 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX10>, - VOP3be_gfx10(NAME#"_e64").Pfl>; + VOP3be_gfx10(NAME#"_e64").Pfl> { + let IsSingle = 1; + } } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" @@ -1191,7 +1195,10 @@ defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; + +let IsSingle = 1 in { defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; +} // VOP2 no carry-in, carry-out. defm V_ADD_NC_U32 : @@ -1684,7 +1691,9 @@ let SubtargetPredicate = isGFX90APlus in { defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; - defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; + let IsSingle = 1 in { + defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; + } } // End SubtargetPredicate = isGFX90APlus multiclass VOP2_Real_DOT_ACC_gfx9 op> : VOP2_Real_e32_vi { diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -811,6 +811,7 @@ VOP3e_gfx10(opName#"_e64").Pfl> { VOP3_Pseudo ps = !cast(opName#"_e64"); let AsmString = asmName # ps.AsmOperands; + let IsSingle = 1; } } multiclass VOP3be_Real_gfx10 op> { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll @@ -32,7 +32,7 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX10-NEXT: v_ashrrev_i16_e64 v0, v1, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = ashr i8 %value, %amount ret i8 %result @@ -65,7 +65,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 7, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 7, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = ashr i8 %value, 7 ret i8 %result @@ -595,7 +595,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_ashrrev_i16_e64 v0, v1, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = ashr i16 %value, %amount ret i16 %result @@ -684,7 +684,7 @@ ; ; GFX10-LABEL: ashr_i16_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_ashrrev_i16_e64 v0, v0, s0 +; GFX10-NEXT: v_ashrrev_i16 v0, v0, s0 ; GFX10-NEXT: ; return to shader part epilog %result = ashr i16 %value, %amount %cast = bitcast i16 %result to half @@ -711,7 +711,7 @@ ; ; GFX10-LABEL: ashr_i16_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_ashrrev_i16_e64 v0, s0, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = ashr i16 %value, %amount %cast = bitcast i16 %result to half diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll @@ -11,7 +11,7 @@ ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] -; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] +; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX10: v_mov_b32_e32 [[B]], 0x7fff ; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]] define i16 @v_clamp_i64_i16(i64 %in) #0 { @@ -28,7 +28,7 @@ ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] -; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] +; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX10: v_mov_b32_e32 [[B]], 0x7fff ; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]] define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 { @@ -72,7 +72,7 @@ ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] -; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] +; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX10: v_mov_b32_e32 [[B]], 0x100 ; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]] define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 { @@ -89,7 +89,7 @@ ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] -; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] +; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX10: v_mov_b32_e32 [[B]], 0x100 ; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]] define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -4175,7 +4175,7 @@ ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, s4 ; GFX10-NEXT: v_mov_b32_e32 v3, s5 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -127,11 +127,11 @@ ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10-NEXT: v_sub_nc_u16_e64 v1, 6, v0 +; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 ; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v0, s0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v1, s1 +; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0 +; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -245,7 +245,7 @@ ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 1, v1 +; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GFX10-NEXT: v_mul_lo_u32 v4, s4, v3 @@ -261,11 +261,11 @@ ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f -; GFX10-NEXT: v_sub_nc_u16_e64 v4, 6, v2 +; GFX10-NEXT: v_sub_nc_u16 v4, 6, v2 ; GFX10-NEXT: v_and_b32_e32 v2, v2, v3 ; GFX10-NEXT: v_and_b32_e32 v3, v4, v3 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v2, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v3, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt) @@ -371,9 +371,9 @@ ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 1, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v2, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v3, v1 +; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt) @@ -452,8 +452,8 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 4, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 4, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4) @@ -532,8 +532,8 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 5, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 3, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 3, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5) @@ -756,13 +756,13 @@ ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 -; GFX10-NEXT: v_lshrrev_b16_e64 v4, 1, v4 +; GFX10-NEXT: v_lshrrev_b16 v4, 1, v4 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 1, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v3, v3, v5 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v2, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v4, v6, v4 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v7, v1 +; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 +; GFX10-NEXT: v_lshlrev_b16 v3, v3, v5 +; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 +; GFX10-NEXT: v_lshrrev_b16 v4, v6, v4 +; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1 ; GFX10-NEXT: v_or_b32_e32 v2, v3, v4 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD @@ -1163,7 +1163,7 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v11, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, v11, v0 ; GFX10-NEXT: v_xor_b32_e32 v11, -1, v8 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 @@ -1174,25 +1174,25 @@ ; GFX10-NEXT: v_and_b32_e32 v12, s4, v1 ; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_lshlrev_b16_e64 v3, v8, v3 +; GFX10-NEXT: v_lshlrev_b16 v3, v8, v3 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v9 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX10-NEXT: v_and_b32_e32 v9, 7, v9 ; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 -; GFX10-NEXT: v_lshrrev_b16_e64 v6, 1, v6 +; GFX10-NEXT: v_lshrrev_b16 v6, 1, v6 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 1, v1 +; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13 -; GFX10-NEXT: v_lshrrev_b16_e64 v7, 1, v7 -; GFX10-NEXT: v_lshrrev_b16_e64 v6, v11, v6 -; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, v5 +; GFX10-NEXT: v_lshrrev_b16 v7, 1, v7 +; GFX10-NEXT: v_lshrrev_b16 v6, v11, v6 +; GFX10-NEXT: v_lshlrev_b16 v2, v2, v5 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 -; GFX10-NEXT: v_lshrrev_b16_e64 v12, 1, v12 -; GFX10-NEXT: v_lshrrev_b16_e64 v5, v13, v7 -; GFX10-NEXT: v_lshlrev_b16_e64 v4, v9, v4 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v8, v1 +; GFX10-NEXT: v_lshrrev_b16 v12, 1, v12 +; GFX10-NEXT: v_lshrrev_b16 v5, v13, v7 +; GFX10-NEXT: v_lshlrev_b16 v4, v9, v4 +; GFX10-NEXT: v_lshrrev_b16 v1, v8, v1 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 -; GFX10-NEXT: v_lshrrev_b16_e64 v7, v10, v12 +; GFX10-NEXT: v_lshrrev_b16 v7, v10, v12 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v5 ; GFX10-NEXT: v_mov_b32_e32 v6, 8 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 @@ -2980,10 +2980,10 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 1, v1 +; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 ; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v2, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v3, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt) @@ -3020,8 +3020,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 4, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 12, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 12, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4) @@ -3058,8 +3058,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 5, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 11, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 11, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5) @@ -3115,8 +3115,8 @@ ; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v0, s0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v1, s1 +; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0 +; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt) @@ -3162,11 +3162,11 @@ ; ; GFX10-LABEL: v_fshl_i16_svs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshrrev_b16 v0, 1, v0 ; GFX10-NEXT: s_andn2_b32 s2, 15, s1 ; GFX10-NEXT: s_and_b32 s1, s1, 15 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 -; GFX10-NEXT: v_lshrrev_b16_e64 v0, s2, v0 +; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -3221,7 +3221,7 @@ ; GFX10-NEXT: s_andn2_b32 s1, 15, s1 ; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, s2, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0 ; GFX10-NEXT: s_lshr_b32 s0, s0, s3 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -124,11 +124,11 @@ ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo -; GFX10-NEXT: v_sub_nc_u16_e64 v1, 6, v0 +; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 ; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 ; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 -; GFX10-NEXT: v_lshrrev_b16_e64 v0, v0, s1 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, v1, s0 +; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 +; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -240,7 +240,7 @@ ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 ; GFX10-NEXT: s_sub_i32 s4, 0, 7 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 @@ -258,11 +258,11 @@ ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f -; GFX10-NEXT: v_sub_nc_u16_e64 v4, 6, v2 +; GFX10-NEXT: v_sub_nc_u16 v4, 6, v2 ; GFX10-NEXT: v_and_b32_e32 v2, v2, v3 ; GFX10-NEXT: v_and_b32_e32 v7, v4, v3 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v2, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v7, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v7, v0 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) @@ -365,10 +365,10 @@ ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v2, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v3, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) @@ -447,8 +447,8 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 4, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 4, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) @@ -527,8 +527,8 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 3, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 5, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) @@ -749,15 +749,15 @@ ; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 ; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX10-NEXT: v_lshlrev_b16_e64 v4, 1, v4 +; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 ; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 ; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX10-NEXT: v_lshrrev_b16_e64 v3, v3, v5 -; GFX10-NEXT: v_lshlrev_b16_e64 v4, v6, v4 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v7, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v2, v0 +; GFX10-NEXT: v_lshrrev_b16 v3, v3, v5 +; GFX10-NEXT: v_lshlrev_b16 v4, v6, v4 +; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 ; GFX10-NEXT: v_or_b32_e32 v2, v4, v3 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD @@ -1164,14 +1164,14 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 ; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 ; GFX10-NEXT: v_and_b32_e32 v15, 7, v8 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v14, 7, v11 -; GFX10-NEXT: v_lshlrev_b16_e64 v3, 1, v3 +; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 ; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v15, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, v15, v0 ; GFX10-NEXT: v_mov_b32_e32 v15, 0xff -; GFX10-NEXT: v_lshlrev_b16_e64 v3, v14, v3 +; GFX10-NEXT: v_lshlrev_b16 v3, v14, v3 ; GFX10-NEXT: v_xor_b32_e32 v14, -1, v12 ; GFX10-NEXT: s_movk_i32 s4, 0xff ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 @@ -1181,17 +1181,17 @@ ; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 ; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 ; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 -; GFX10-NEXT: v_lshlrev_b16_e64 v4, 1, v4 +; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 ; GFX10-NEXT: v_and_b32_e32 v15, 7, v14 -; GFX10-NEXT: v_lshlrev_b16_e64 v5, 1, v5 +; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5 ; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX10-NEXT: v_lshrrev_b16_e64 v6, v6, v7 -; GFX10-NEXT: v_lshlrev_b16_e64 v4, v11, v4 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v10, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v5, v15, v5 -; GFX10-NEXT: v_lshrrev_b16_e64 v7, v12, v9 -; GFX10-NEXT: v_lshrrev_b16_e64 v2, v2, v8 +; GFX10-NEXT: v_lshrrev_b16 v6, v6, v7 +; GFX10-NEXT: v_lshlrev_b16 v4, v11, v4 +; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1 +; GFX10-NEXT: v_lshlrev_b16 v5, v15, v5 +; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 +; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v6, 8 @@ -2814,10 +2814,10 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v2, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v3, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) @@ -2854,8 +2854,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 12, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 4, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) @@ -2892,8 +2892,8 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 11, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, 5, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) @@ -2946,8 +2946,8 @@ ; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 -; GFX10-NEXT: v_lshrrev_b16_e64 v0, v0, s1 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, v1, s0 +; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 +; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) @@ -2998,7 +2998,7 @@ ; GFX10-NEXT: s_and_b32 s2, s1, 15 ; GFX10-NEXT: s_andn2_b32 s1, 15, s1 ; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000 -; GFX10-NEXT: v_lshrrev_b16_e64 v0, s2, v0 +; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1 @@ -3049,12 +3049,12 @@ ; ; GFX10-LABEL: v_fshr_i16_vss: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: s_andn2_b32 s2, 15, s1 ; GFX10-NEXT: s_and_b32 s1, s1, 15 ; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, s2, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 ; GFX10-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -83,13 +83,13 @@ ; GFX10-NEXT: global_load_dwordx4 v[40:43], v70, s[0:1] offset:32 ; GFX10-NEXT: global_load_dwordx4 v[44:47], v70, s[0:1] offset:48 ; GFX10-NEXT: global_load_dwordx4 v[48:51], v70, s[0:1] offset:64 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v5, v70 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v5, v70 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v6, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v64, vcc_lo, v0, 64 +; GFX10-NEXT: v_add_co_u32 v64, vcc_lo, v0, 64 ; GFX10-NEXT: v_add_co_ci_u32_e32 v65, vcc_lo, 0, v5, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v66, vcc_lo, v0, v1 +; GFX10-NEXT: v_add_co_u32 v66, vcc_lo, v0, v1 ; GFX10-NEXT: v_add_co_ci_u32_e32 v67, vcc_lo, v5, v2, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v68, vcc_lo, v0, v3 +; GFX10-NEXT: v_add_co_u32 v68, vcc_lo, v0, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v69, vcc_lo, v5, v4, vcc_lo ; GFX10-NEXT: s_clause 0xa ; GFX10-NEXT: global_load_dwordx4 v[52:55], v[64:65], off offset:16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll @@ -1433,14 +1433,14 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, 20 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 20 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_atomic_inc v3, v[0:1], v3 glc ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dword v[0:1], v3 @@ -1505,10 +1505,10 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 42 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, 20 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 20 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_atomic_inc v0, v[0:1], v2 glc ; GFX10-NEXT: s_endpgm @@ -1830,14 +1830,14 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v8, s3 ; GFX10-NEXT: v_mov_b32_e32 v7, s2 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v7, v4 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v7, v4 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v8, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, 40 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 40 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, v4 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -1905,11 +1905,11 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 42 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, 40 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 40 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll @@ -23,7 +23,7 @@ ; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v3 +; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 ; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo ; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -54,7 +54,7 @@ ; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 -; GCN-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v3 +; GCN-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3 ; GCN-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo ; GCN-NEXT: global_atomic_csub v0, v[0:1], v2, off glc ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -32,7 +32,7 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v0, v1, v0 +; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = lshr i8 %value, %amount ret i8 %result @@ -65,7 +65,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 7, v0 +; GFX10-NEXT: v_lshrrev_b16 v0, 7, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = lshr i8 %value, 7 ret i8 %result @@ -133,6 +133,16 @@ ; GCN-NEXT: v_and_b32_e32 v0, s4, v0 ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_lshr_i24: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s4, 0xffffff +; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] %result = lshr i24 %value, %amount ret i24 %result } @@ -594,7 +604,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshrrev_b16_e64 v0, v1, v0 +; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = lshr i16 %value, %amount ret i16 %result @@ -688,7 +698,7 @@ ; ; GFX10-LABEL: lshr_i16_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_lshrrev_b16_e64 v0, v0, s0 +; GFX10-NEXT: v_lshrrev_b16 v0, v0, s0 ; GFX10-NEXT: ; return to shader part epilog %result = lshr i16 %value, %amount %cast = bitcast i16 %result to half @@ -716,7 +726,7 @@ ; ; GFX10-LABEL: lshr_i16_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_lshrrev_b16_e64 v0, s0, v0 +; GFX10-NEXT: v_lshrrev_b16 v0, s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = lshr i16 %value, %amount %cast = bitcast i16 %result to half diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -63,7 +63,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_lo_u16_e64 v0, v0, v1 +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i16 %num, %den ret i16 %result @@ -130,7 +130,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_lo_u16_e64 v0, v0, v1 +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 16 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i16 %num, %den @@ -203,7 +203,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_lo_u16_e64 v0, v0, v1 +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 ; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i16 %num, %den @@ -570,6 +570,29 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-NEXT: v_mov_b32_e32 v1, v7 ; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_mul_i96: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_lo_u32 v6, v1, v3 +; GFX10-NEXT: v_mul_lo_u32 v7, v0, v4 +; GFX10-NEXT: v_mul_hi_u32 v8, v0, v3 +; GFX10-NEXT: v_mul_lo_u32 v9, v1, v4 +; GFX10-NEXT: v_mul_lo_u32 v2, v2, v3 +; GFX10-NEXT: v_mul_lo_u32 v5, v0, v5 +; GFX10-NEXT: v_mul_hi_u32 v4, v0, v4 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, v3 +; GFX10-NEXT: v_add_co_u32 v6, s4, v6, v7 +; GFX10-NEXT: v_mul_hi_u32 v7, v1, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v11, 0, 1, s4 +; GFX10-NEXT: v_add_nc_u32_e32 v2, v2, v9 +; GFX10-NEXT: v_add_co_u32 v1, s4, v6, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v2, v2, v5, v7 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v11, v6 +; GFX10-NEXT: v_add3_u32 v2, v2, v4, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i96 %num, %den ret i96 %result } @@ -951,6 +974,50 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, v9 ; GFX9-NEXT: v_mov_b32_e32 v2, v10 ; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_mul_i128: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_lo_u32 v8, v2, v4 +; GFX10-NEXT: v_mul_lo_u32 v9, v1, v5 +; GFX10-NEXT: v_mul_lo_u32 v10, v1, v4 +; GFX10-NEXT: v_mul_lo_u32 v11, v0, v5 +; GFX10-NEXT: v_mul_hi_u32 v12, v0, v4 +; GFX10-NEXT: v_mul_lo_u32 v13, v0, v6 +; GFX10-NEXT: v_mul_hi_u32 v15, v0, v5 +; GFX10-NEXT: v_mul_lo_u32 v3, v3, v4 +; GFX10-NEXT: v_mul_lo_u32 v7, v0, v7 +; GFX10-NEXT: v_add_co_u32 v8, s4, v8, v9 +; GFX10-NEXT: v_add_co_u32 v9, s5, v10, v11 +; GFX10-NEXT: v_mul_hi_u32 v11, v1, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v13, s4, v8, v13 +; GFX10-NEXT: v_add_co_u32 v8, s5, v9, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v18, s4, v13, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s4 +; GFX10-NEXT: v_add_nc_u32_e32 v9, v10, v9 +; GFX10-NEXT: v_mul_lo_u32 v10, v2, v5 +; GFX10-NEXT: v_add_co_u32 v11, s4, v18, v15 +; GFX10-NEXT: v_mul_hi_u32 v15, v2, v4 +; GFX10-NEXT: v_add3_u32 v12, v14, v12, v13 +; GFX10-NEXT: v_mul_lo_u32 v13, v1, v6 +; GFX10-NEXT: v_mul_hi_u32 v1, v1, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v2, s4, v11, v9 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v3, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v6, v0, v6 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, v4 +; GFX10-NEXT: v_add3_u32 v10, v10, v13, v7 +; GFX10-NEXT: v_add3_u32 v4, v12, v14, v5 +; GFX10-NEXT: v_add3_u32 v1, v10, v15, v1 +; GFX10-NEXT: v_add3_u32 v3, v1, v6, v4 +; GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i128 %num, %den ret i128 %result } @@ -2674,6 +2741,204 @@ ; GFX9-NEXT: v_mov_b32_e32 v5, v20 ; GFX9-NEXT: v_mov_b32_e32 v6, v21 ; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_mul_i256: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_mul_lo_u32 v16, v1, v8 +; GFX10-NEXT: v_mul_lo_u32 v17, v0, v9 +; GFX10-NEXT: v_mul_hi_u32 v18, v0, v8 +; GFX10-NEXT: v_mul_lo_u32 v19, v2, v8 +; GFX10-NEXT: v_mul_lo_u32 v20, v1, v9 +; GFX10-NEXT: v_mul_hi_u32 v21, v1, v8 +; GFX10-NEXT: v_mul_lo_u32 v22, v3, v8 +; GFX10-NEXT: v_mul_lo_u32 v25, v1, v10 +; GFX10-NEXT: v_mul_hi_u32 v23, v0, v9 +; GFX10-NEXT: v_add_co_u32 v16, s4, v16, v17 +; GFX10-NEXT: v_mul_hi_u32 v27, v0, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v17, 0, 1, s4 +; GFX10-NEXT: v_mul_lo_u32 v7, v7, v8 +; GFX10-NEXT: v_mul_lo_u32 v15, v0, v15 +; GFX10-NEXT: v_add_co_u32 v16, s4, v16, v18 +; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v19, s4, v19, v20 +; GFX10-NEXT: v_mul_lo_u32 v20, v2, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 +; GFX10-NEXT: v_add_nc_u32_e32 v17, v17, v18 +; GFX10-NEXT: v_mul_lo_u32 v18, v0, v10 +; GFX10-NEXT: v_add_co_u32 v18, s4, v19, v18 +; GFX10-NEXT: v_cndmask_b32_e64 v19, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v20, s4, v22, v20 +; GFX10-NEXT: v_mul_lo_u32 v22, v0, v11 +; GFX10-NEXT: v_add_co_u32 v18, s5, v18, v21 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v25 +; GFX10-NEXT: v_add_co_u32 v18, s5, v18, v23 +; GFX10-NEXT: v_mul_hi_u32 v23, v1, v9 +; GFX10-NEXT: v_add3_u32 v19, v24, v19, v21 +; GFX10-NEXT: v_mul_hi_u32 v21, v2, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v22 +; GFX10-NEXT: v_cndmask_b32_e64 v29, 0, 1, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v17, s5, v18, v17 +; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v21 +; GFX10-NEXT: v_add3_u32 v21, v26, v24, v25 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 +; GFX10-NEXT: v_mul_lo_u32 v25, v4, v8 +; GFX10-NEXT: v_mul_lo_u32 v26, v3, v9 +; GFX10-NEXT: v_add_co_u32 v20, s4, v20, v23 +; GFX10-NEXT: v_add3_u32 v18, v19, v29, v18 +; GFX10-NEXT: v_cndmask_b32_e64 v23, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v29, v3, v9 +; GFX10-NEXT: v_add_co_u32 v20, s5, v20, v27 +; GFX10-NEXT: v_add3_u32 v30, v21, v24, v23 +; GFX10-NEXT: v_mul_lo_u32 v21, v2, v10 +; GFX10-NEXT: v_add_co_u32 v22, s4, v25, v26 +; GFX10-NEXT: v_mul_lo_u32 v24, v1, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v26, v3, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v23, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v21, s4, v22, v21 +; GFX10-NEXT: v_mul_lo_u32 v22, v0, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v24 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v18, s4, v20, v18 +; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v22 +; GFX10-NEXT: v_mul_hi_u32 v22, v2, v9 +; GFX10-NEXT: v_add3_u32 v24, v25, v27, v24 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v19, v30, v23, v20 +; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v26 +; GFX10-NEXT: v_mul_hi_u32 v20, v1, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v27, v0, v11 +; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v22 +; GFX10-NEXT: v_mul_lo_u32 v22, v5, v8 +; GFX10-NEXT: v_add3_u32 v23, v24, v25, v26 +; GFX10-NEXT: v_mul_lo_u32 v24, v4, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v30, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v20, s4, v21, v20 +; GFX10-NEXT: v_mul_lo_u32 v26, v3, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v20, s5, v20, v27 +; GFX10-NEXT: v_add_co_u32 v31, s4, v22, v24 +; GFX10-NEXT: v_add3_u32 v35, v23, v30, v21 +; GFX10-NEXT: v_mul_lo_u32 v23, v2, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v34, 0, 1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v22, s4, v31, v26 +; GFX10-NEXT: v_mul_lo_u32 v26, v1, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v19, s5, v20, v19 +; GFX10-NEXT: v_add_co_u32 v31, s4, v22, v23 +; GFX10-NEXT: v_mul_lo_u32 v23, v0, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, 1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v22, s4, v31, v26 +; GFX10-NEXT: v_mul_hi_u32 v26, v4, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v30, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v20, v35, v25, v20 +; GFX10-NEXT: v_add_co_u32 v31, s4, v22, v23 +; GFX10-NEXT: v_add3_u32 v23, v34, v27, v28 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v22, v2, v10 +; GFX10-NEXT: v_mul_lo_u32 v28, v5, v9 +; GFX10-NEXT: v_add_co_u32 v27, s4, v31, v26 +; GFX10-NEXT: v_mul_hi_u32 v26, v1, v11 +; GFX10-NEXT: v_add3_u32 v23, v23, v30, v24 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v21, s4, v27, v29 +; GFX10-NEXT: v_mul_lo_u32 v27, v6, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v29, v0, v12 +; GFX10-NEXT: v_add_co_u32 v21, s4, v21, v22 +; GFX10-NEXT: v_add3_u32 v23, v23, v24, v25 +; GFX10-NEXT: v_mul_lo_u32 v24, v4, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v33, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v25, s4, v27, v28 +; GFX10-NEXT: v_add_co_u32 v31, s5, v21, v26 +; GFX10-NEXT: v_mul_lo_u32 v27, v3, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s5 +; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v24, s4, v25, v24 +; GFX10-NEXT: v_add_co_u32 v21, s5, v31, v29 +; GFX10-NEXT: v_add3_u32 v39, v23, v33, v26 +; GFX10-NEXT: v_mul_lo_u32 v23, v2, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v35, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v24, s4, v24, v27 +; GFX10-NEXT: v_mul_lo_u32 v27, v1, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v29, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v31, v4, v9 +; GFX10-NEXT: v_mul_hi_u32 v25, v3, v10 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s5 +; GFX10-NEXT: v_add_co_u32 v34, s4, v24, v23 +; GFX10-NEXT: v_mul_lo_u32 v24, v0, v14 +; GFX10-NEXT: v_cndmask_b32_e64 v30, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v35, v28, v35, v29 +; GFX10-NEXT: v_add_co_u32 v20, s5, v21, v20 +; GFX10-NEXT: v_add_co_u32 v23, s4, v34, v27 +; GFX10-NEXT: v_mul_hi_u32 v27, v5, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v32, 0, 1, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v21, 0, 1, s5 +; GFX10-NEXT: v_mul_lo_u32 v29, v3, v12 +; GFX10-NEXT: v_add_co_u32 v34, s4, v23, v24 +; GFX10-NEXT: v_mul_hi_u32 v3, v3, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v22, v35, v30, v32 +; GFX10-NEXT: v_add3_u32 v21, v39, v26, v21 +; GFX10-NEXT: v_add_co_u32 v34, s4, v34, v27 +; GFX10-NEXT: v_mul_hi_u32 v26, v2, v11 +; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v23, s4, v34, v31 +; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v22, v22, v28, v27 +; GFX10-NEXT: v_mul_lo_u32 v28, v6, v9 +; GFX10-NEXT: v_add_co_u32 v23, s4, v23, v25 +; GFX10-NEXT: v_mul_hi_u32 v27, v1, v12 +; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v6, v6, v8 +; GFX10-NEXT: v_add_co_u32 v30, s4, v23, v26 +; GFX10-NEXT: v_add3_u32 v33, v22, v24, v25 +; GFX10-NEXT: v_mul_lo_u32 v24, v5, v10 +; GFX10-NEXT: v_mul_lo_u32 v25, v4, v11 +; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v28 +; GFX10-NEXT: v_mul_lo_u32 v28, v2, v13 +; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v23, s4, v30, v27 +; GFX10-NEXT: v_mul_hi_u32 v5, v5, v9 +; GFX10-NEXT: v_cndmask_b32_e64 v27, 0, 1, s4 +; GFX10-NEXT: v_mul_hi_u32 v4, v4, v10 +; GFX10-NEXT: v_add3_u32 v7, v7, v24, v25 +; GFX10-NEXT: v_mul_lo_u32 v24, v1, v14 +; GFX10-NEXT: v_mul_hi_u32 v25, v0, v13 +; GFX10-NEXT: v_add3_u32 v33, v33, v26, v27 +; GFX10-NEXT: v_mul_hi_u32 v2, v2, v12 +; GFX10-NEXT: v_add3_u32 v26, v7, v29, v28 +; GFX10-NEXT: v_mul_hi_u32 v1, v1, v13 +; GFX10-NEXT: v_add3_u32 v7, v26, v24, v15 +; GFX10-NEXT: v_add_co_u32 v11, s4, v23, v25 +; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v5, v7, v6, v5 +; GFX10-NEXT: v_add_co_u32 v6, s4, v11, v21 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s4 +; GFX10-NEXT: v_add3_u32 v3, v5, v4, v3 +; GFX10-NEXT: v_mul_hi_u32 v4, v0, v14 +; GFX10-NEXT: v_mul_lo_u32 v0, v0, v8 +; GFX10-NEXT: v_add3_u32 v5, v33, v10, v7 +; GFX10-NEXT: v_add3_u32 v3, v3, v2, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, v16 +; GFX10-NEXT: v_mov_b32_e32 v2, v17 +; GFX10-NEXT: v_add3_u32 v7, v3, v4, v5 +; GFX10-NEXT: v_mov_b32_e32 v3, v18 +; GFX10-NEXT: v_mov_b32_e32 v4, v19 +; GFX10-NEXT: v_mov_b32_e32 v5, v20 +; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i256 %num, %den ret i256 %result } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -48,10 +48,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 9, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 9, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 9, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 9, v1 ; GFX10-NEXT: v_add_nc_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 9, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 9, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i7 @llvm.sadd.sat.i7(i7 %lhs, i7 %rhs) ret i7 %result @@ -111,7 +111,7 @@ ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 ; GFX10-NEXT: v_add_nc_i16 v0, s0, s1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 9, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 9, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i7 @llvm.sadd.sat.i7(i7 %lhs, i7 %rhs) @@ -162,10 +162,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 8, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 8, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_add_nc_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 8, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.sadd.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result @@ -225,7 +225,7 @@ ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 ; GFX10-NEXT: v_add_nc_i16 v0, s0, s1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 8, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i8 @llvm.sadd.sat.i8(i8 %lhs, i8 %rhs) @@ -4199,12 +4199,12 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v10, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3] ; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v11 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, v[10:11], v[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, s5, v6, 0 +; GFX10-NEXT: v_add_co_u32 v0, s5, v6, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s5, 0x80000000, v6, s5 ; GFX10-NEXT: s_xor_b32 vcc_lo, vcc_lo, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc_lo @@ -4377,12 +4377,12 @@ ; ; GFX10-LABEL: saddsat_i64_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, s0, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[0:1] ; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX10-NEXT: v_cmp_gt_i64_e64 s0, s[0:1], v[2:3] -; GFX10-NEXT: v_add_co_u32_e64 v0, s1, v4, 0 +; GFX10-NEXT: v_add_co_u32 v0, s1, v4, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, 0x80000000, v4, s1 ; GFX10-NEXT: s_xor_b32 vcc_lo, vcc_lo, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo @@ -4444,12 +4444,12 @@ ; ; GFX10-LABEL: saddsat_i64_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v0, s0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_lt_i64_e64 s1, s[0:1], 0 ; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, v4, 0 +; GFX10-NEXT: v_add_co_u32 v0, s0, v4, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0x80000000, v4, s0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo @@ -4548,18 +4548,18 @@ ; GFX10-NEXT: v_mov_b32_e32 v17, v2 ; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_add_co_u32_e64 v8, vcc_lo, v14, v4 +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_gt_i64_e64 s6, 0, v[6:7] ; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v19, vcc_lo, v17, v6 +; GFX10-NEXT: v_add_co_u32 v19, vcc_lo, v17, v6 ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo ; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_u32 v1, s5, v12, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 ; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 +; GFX10-NEXT: v_add_co_u32 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo @@ -5320,7 +5320,7 @@ ; ; GFX10-LABEL: saddsat_i128_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v4, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, s0, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, s3, v3, vcc_lo @@ -5362,7 +5362,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v15, v0, s0 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v15, v1, s0 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v0, vcc_lo @@ -5574,7 +5574,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; GFX10-NEXT: v_mov_b32_e32 v10, v3 ; GFX10-NEXT: s_cmp_eq_u64 s[2:3], 0 -; GFX10-NEXT: v_add_co_u32_e64 v15, vcc_lo, v5, s0 +; GFX10-NEXT: v_add_co_u32 v15, vcc_lo, v5, s0 ; GFX10-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, s1, v6, vcc_lo ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 @@ -5618,7 +5618,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v7, v0, s0 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v7, v1, s0 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v0, vcc_lo @@ -5964,7 +5964,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v20, v2 ; GFX10-NEXT: v_mov_b32_e32 v21, v3 ; GFX10-NEXT: s_movk_i32 s5, 0x7f -; GFX10-NEXT: v_add_co_u32_e64 v16, vcc_lo, v22, v8 +; GFX10-NEXT: v_add_co_u32 v16, vcc_lo, v22, v8 ; GFX10-NEXT: s_sub_i32 s6, 64, s5 ; GFX10-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, v23, v9, vcc_lo ; GFX10-NEXT: s_sub_i32 s7, s5, 64 @@ -6010,12 +6010,12 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v11, v0, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v11, v1, s4 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v9 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX10-NEXT: v_add_co_u32_e64 v8, s4, v26, v12 +; GFX10-NEXT: v_add_co_u32 v8, s4, v26, v12 ; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v27, v13, s4 ; GFX10-NEXT: v_add_co_ci_u32_e64 v10, s4, v24, v14, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc_lo @@ -6056,7 +6056,7 @@ ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v18, v3, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, v4, s5 -; GFX10-NEXT: v_add_co_u32_e64 v5, s4, v5, 0 +; GFX10-NEXT: v_add_co_u32 v5, s4, v5, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s4, 0, v6, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -264,7 +264,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v5, s1 ; GFX10-NEXT: v_mov_b32_e32 v4, s0 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v4, v2 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v5, v3, vcc_lo ; GFX10-NEXT: global_store_dword v[2:3], v1, off ; GFX10-NEXT: s_endpgm @@ -599,7 +599,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 0x3fff, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 2, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 2, v0 ; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 16 ; GFX10-NEXT: s_setpc_b64 s[30:31] %and = and i16 %x, 16383 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -29,7 +29,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = shl i8 %value, %amount ret i8 %result @@ -58,7 +58,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 7, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 7, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = shl i8 %value, 7 ret i8 %result @@ -592,7 +592,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = shl i16 %value, %amount ret i16 %result @@ -693,7 +693,7 @@ ; ; GFX10-LABEL: shl_i16_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v0, s0 +; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0 ; GFX10-NEXT: ; return to shader part epilog %result = shl i16 %value, %amount %cast = bitcast i16 %result to half @@ -719,7 +719,7 @@ ; ; GFX10-LABEL: shl_i16_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_lshlrev_b16_e64 v0, s0, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = shl i16 %value, %amount %cast = bitcast i16 %result to half diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -48,10 +48,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 9, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 9, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 9, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 9, v1 ; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 9, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 9, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i7 @llvm.ssub.sat.i7(i7 %lhs, i7 %rhs) ret i7 %result @@ -111,7 +111,7 @@ ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 ; GFX10-NEXT: v_sub_nc_i16 v0, s0, s1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 9, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 9, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i7 @llvm.ssub.sat.i7(i7 %lhs, i7 %rhs) @@ -162,10 +162,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 8, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 8, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 8, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result @@ -225,7 +225,7 @@ ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 ; GFX10-NEXT: v_sub_nc_i16 v0, s0, s1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 8, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs) @@ -4185,12 +4185,12 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_co_u32_e64 v10, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v0, v2 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v11, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3] ; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v11 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, v[10:11], v[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, s5, v6, 0 +; GFX10-NEXT: v_add_co_u32 v0, s5, v6, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s5, 0x80000000, v6, s5 ; GFX10-NEXT: s_xor_b32 vcc_lo, vcc_lo, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc_lo @@ -4363,12 +4363,12 @@ ; ; GFX10-LABEL: ssubsat_i64_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_co_u32_e64 v2, vcc_lo, s0, v0 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, s0, v0 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[0:1] ; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX10-NEXT: v_cmp_gt_i64_e64 s0, s[0:1], v[2:3] -; GFX10-NEXT: v_add_co_u32_e64 v0, s1, v4, 0 +; GFX10-NEXT: v_add_co_u32 v0, s1, v4, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s1, 0x80000000, v4, s1 ; GFX10-NEXT: s_xor_b32 vcc_lo, vcc_lo, s0 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo @@ -4430,12 +4430,12 @@ ; ; GFX10-LABEL: ssubsat_i64_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_co_u32_e64 v2, vcc_lo, v0, s0 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, s0 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_gt_i64_e64 s1, s[0:1], 0 ; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, v4, 0 +; GFX10-NEXT: v_add_co_u32 v0, s0, v4, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0x80000000, v4, s0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo @@ -4534,18 +4534,18 @@ ; GFX10-NEXT: v_mov_b32_e32 v17, v2 ; GFX10-NEXT: v_mov_b32_e32 v18, v3 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[4:5] -; GFX10-NEXT: v_sub_co_u32_e64 v8, vcc_lo, v14, v4 +; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v14, v4 ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, 0, v[6:7] ; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v15, v5, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v19, vcc_lo, v17, v6 +; GFX10-NEXT: v_sub_co_u32 v19, vcc_lo, v17, v6 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v20, vcc_lo, v18, v7, vcc_lo ; GFX10-NEXT: v_ashrrev_i32_e32 v12, 31, v9 ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[8:9], v[14:15] ; GFX10-NEXT: v_ashrrev_i32_e32 v0, 31, v20 -; GFX10-NEXT: v_add_co_u32_e64 v1, s5, v12, 0 +; GFX10-NEXT: v_add_co_u32 v1, s5, v12, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s5, 0x80000000, v12, s5 ; GFX10-NEXT: v_cmp_lt_i64_e64 s5, v[19:20], v[17:18] -; GFX10-NEXT: v_add_co_u32_e64 v2, s7, v0, 0 +; GFX10-NEXT: v_add_co_u32 v2, s7, v0, 0 ; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s7, 0x80000000, v0, s7 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc_lo @@ -5306,7 +5306,7 @@ ; ; GFX10-LABEL: ssubsat_i128_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_co_u32_e64 v4, vcc_lo, s0, v0 +; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, s0, v0 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_sub_co_ci_u32_e32 v6, vcc_lo, s2, v2, vcc_lo ; GFX10-NEXT: v_sub_co_ci_u32_e32 v7, vcc_lo, s3, v3, vcc_lo @@ -5348,7 +5348,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v15, v0, s0 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v15, v1, s0 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v0, vcc_lo @@ -5560,7 +5560,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v9, v2 ; GFX10-NEXT: v_mov_b32_e32 v10, v3 ; GFX10-NEXT: s_cmp_eq_u64 s[2:3], 0 -; GFX10-NEXT: v_sub_co_u32_e64 v15, vcc_lo, v5, s0 +; GFX10-NEXT: v_sub_co_u32 v15, vcc_lo, v5, s0 ; GFX10-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], 0 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v16, vcc_lo, s1, v6, vcc_lo ; GFX10-NEXT: s_cselect_b32 s4, 1, 0 @@ -5604,7 +5604,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v7, v0, s0 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v9 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v7, v1, s0 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v0, vcc_lo @@ -5950,7 +5950,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v20, v2 ; GFX10-NEXT: v_mov_b32_e32 v21, v3 ; GFX10-NEXT: s_movk_i32 s5, 0x7f -; GFX10-NEXT: v_sub_co_u32_e64 v16, vcc_lo, v22, v8 +; GFX10-NEXT: v_sub_co_u32 v16, vcc_lo, v22, v8 ; GFX10-NEXT: s_sub_i32 s6, 64, s5 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v17, vcc_lo, v23, v9, vcc_lo ; GFX10-NEXT: s_sub_i32 s7, s5, 64 @@ -5996,12 +5996,12 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v11, v0, s4 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v11, v1, s4 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v9 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v2, 0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v0, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v8 -; GFX10-NEXT: v_sub_co_u32_e64 v8, s4, v26, v12 +; GFX10-NEXT: v_sub_co_u32 v8, s4, v26, v12 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v9, s4, v27, v13, s4 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v10, s4, v24, v14, s4 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v16, v2, vcc_lo @@ -6042,7 +6042,7 @@ ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v18, v3, s5 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v18, v4, s5 -; GFX10-NEXT: v_add_co_u32_e64 v5, s4, v5, 0 +; GFX10-NEXT: v_add_co_u32 v5, s4, v5, 0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s4, 0, v6, s4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s5, 0, v7 ; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, 0, v3, s4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -38,10 +38,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 9, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 9, v1 -; GFX10-NEXT: v_add_nc_u16_e64 v0, v0, v1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 9, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 9, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 9, v1 +; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 9, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i7 @llvm.uadd.sat.i7(i7 %lhs, i7 %rhs) ret i7 %result @@ -85,8 +85,8 @@ ; GFX10-NEXT: s_bfe_u32 s2, 9, 0x100000 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 -; GFX10-NEXT: v_add_nc_u16_e64 v0, s0, s1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 9, v0 +; GFX10-NEXT: v_add_nc_u16 v0, s0, s1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 9, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i7 @llvm.uadd.sat.i7(i7 %lhs, i7 %rhs) @@ -127,10 +127,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 8, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 8, v1 -; GFX10-NEXT: v_add_nc_u16_e64 v0, v0, v1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 +; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.uadd.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result @@ -174,8 +174,8 @@ ; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 -; GFX10-NEXT: v_add_nc_u16_e64 v0, s0, s1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 8, v0 +; GFX10-NEXT: v_add_nc_u16 v0, s0, s1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i8 @llvm.uadd.sat.i8(i8 %lhs, i8 %rhs) @@ -1644,7 +1644,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_nc_u16_e64 v0, v0, v1 clamp +; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %result @@ -1677,7 +1677,7 @@ ; ; GFX10-LABEL: s_uaddsat_i16: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_nc_u16_e64 v0, s0, s1 clamp +; GFX10-NEXT: v_add_nc_u16 v0, s0, s1 clamp ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) @@ -1707,7 +1707,7 @@ ; ; GFX10-LABEL: uaddsat_i16_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_nc_u16_e64 v0, s0, v0 clamp +; GFX10-NEXT: v_add_nc_u16 v0, s0, v0 clamp ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) %cast = bitcast i16 %result to half @@ -1737,7 +1737,7 @@ ; ; GFX10-LABEL: uaddsat_i16_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_nc_u16_e64 v0, v0, s0 clamp +; GFX10-NEXT: v_add_nc_u16 v0, v0, s0 clamp ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) %cast = bitcast i16 %result to half @@ -2599,7 +2599,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo @@ -2714,7 +2714,7 @@ ; ; GFX10-LABEL: uaddsat_i64_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, s0, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc_lo @@ -2758,7 +2758,7 @@ ; ; GFX10-LABEL: uaddsat_i64_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[0:1] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo @@ -2823,9 +2823,9 @@ ; GFX10-NEXT: v_mov_b32_e32 v11, v5 ; GFX10-NEXT: v_mov_b32_e32 v15, v6 ; GFX10-NEXT: v_mov_b32_e32 v16, v7 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v10 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v10 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v11, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v5, vcc_lo, v2, v15 +; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v2, v15 ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v3, v16, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[10:11] ; GFX10-NEXT: v_cmp_lt_u64_e64 s4, v[5:6], v[15:16] @@ -3203,7 +3203,7 @@ ; ; GFX10-LABEL: uaddsat_i128_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v10, vcc_lo, s0, v0 +; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, s0, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s2, v2, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s3, v3, vcc_lo @@ -3297,7 +3297,7 @@ ; ; GFX10-LABEL: uaddsat_i128_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, s2, v2, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s3, v3, vcc_lo @@ -3440,7 +3440,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v16, v10 ; GFX10-NEXT: v_mov_b32_e32 v17, v11 ; GFX10-NEXT: v_mov_b32_e32 v10, v12 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, v18 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v18 ; GFX10-NEXT: v_mov_b32_e32 v11, v13 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v19, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v20, v14 @@ -3449,7 +3449,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v17, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[18:19] ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v4, vcc_lo, v4, v10 +; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v4, v10 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v5, v11, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v6, v20, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v21, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -37,10 +37,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 9, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 9, v1 -; GFX10-NEXT: v_sub_nc_u16_e64 v0, v0, v1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 9, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 9, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 9, v1 +; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 9, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i7 @llvm.usub.sat.i7(i7 %lhs, i7 %rhs) ret i7 %result @@ -83,8 +83,8 @@ ; GFX10-NEXT: s_bfe_u32 s2, 9, 0x100000 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 -; GFX10-NEXT: v_sub_nc_u16_e64 v0, s0, s1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 9, v0 +; GFX10-NEXT: v_sub_nc_u16 v0, s0, s1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 9, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i7 @llvm.usub.sat.i7(i7 %lhs, i7 %rhs) @@ -124,10 +124,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 8, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 8, v1 -; GFX10-NEXT: v_sub_nc_u16_e64 v0, v0, v1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 +; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.usub.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result @@ -170,8 +170,8 @@ ; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2 -; GFX10-NEXT: v_sub_nc_u16_e64 v0, s0, s1 clamp -; GFX10-NEXT: v_lshrrev_b16_e64 v0, 8, v0 +; GFX10-NEXT: v_sub_nc_u16 v0, s0, s1 clamp +; GFX10-NEXT: v_lshrrev_b16 v0, 8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i8 @llvm.usub.sat.i8(i8 %lhs, i8 %rhs) @@ -1561,7 +1561,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_nc_u16_e64 v0, v0, v1 clamp +; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %result @@ -1593,7 +1593,7 @@ ; ; GFX10-LABEL: s_usubsat_i16: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_nc_u16_e64 v0, s0, s1 clamp +; GFX10-NEXT: v_sub_nc_u16 v0, s0, s1 clamp ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) @@ -1622,7 +1622,7 @@ ; ; GFX10-LABEL: usubsat_i16_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_nc_u16_e64 v0, s0, v0 clamp +; GFX10-NEXT: v_sub_nc_u16 v0, s0, v0 clamp ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) %cast = bitcast i16 %result to half @@ -1651,7 +1651,7 @@ ; ; GFX10-LABEL: usubsat_i16_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_nc_u16_e64 v0, v0, s0 clamp +; GFX10-NEXT: v_sub_nc_u16 v0, v0, s0 clamp ; GFX10-NEXT: ; return to shader part epilog %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) %cast = bitcast i16 %result to half @@ -2469,7 +2469,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_co_u32_e64 v4, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc_lo @@ -2584,7 +2584,7 @@ ; ; GFX10-LABEL: usubsat_i64_sv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_co_u32_e64 v2, vcc_lo, s0, v0 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, s0, v0 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[0:1], v[0:1] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo @@ -2628,7 +2628,7 @@ ; ; GFX10-LABEL: usubsat_i64_vs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_sub_co_u32_e64 v2, vcc_lo, v0, s0 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, s0 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[0:1] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo @@ -2693,10 +2693,10 @@ ; GFX10-NEXT: v_mov_b32_e32 v11, v1 ; GFX10-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v1, v3 -; GFX10-NEXT: v_sub_co_u32_e64 v8, vcc_lo, v10, v4 +; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v10, v4 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v9, vcc_lo, v11, v5, vcc_lo ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[10:11], v[4:5] -; GFX10-NEXT: v_sub_co_u32_e64 v4, s4, v0, v6 +; GFX10-NEXT: v_sub_co_u32 v4, s4, v0, v6 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v5, s4, v1, v7, s4 ; GFX10-NEXT: v_cmp_lt_u64_e64 s4, v[0:1], v[6:7] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc_lo @@ -3079,7 +3079,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v0, vcc_lo, s0, v0 +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v0 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v2, vcc_lo, s2, v2, vcc_lo @@ -3173,7 +3173,7 @@ ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v0, vcc_lo, v0, s0 +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, s0 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v2, vcc_lo, s2, v2, vcc_lo @@ -3326,13 +3326,13 @@ ; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[24:25], v[14:15] ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, v16 ; GFX10-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v0, vcc_lo, v22, v8 +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v22, v8 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v23, v9, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v8, v18, v17, s5 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v2, vcc_lo, v20, v10, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, 0, s4 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v21, v11, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v4, vcc_lo, v26, v12 +; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v26, v12 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v8 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v27, v13, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -81,8 +81,8 @@ ; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB0_2 @@ -117,7 +117,7 @@ ; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s5, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB0_2 @@ -265,8 +265,8 @@ ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x34 ; GFX1064-NEXT: s_mov_b64 s[8:9], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s8, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s9, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz BB1_2 @@ -304,7 +304,7 @@ ; GFX1032-NEXT: s_load_dword s2, s[0:1], 0x34 ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB1_2 @@ -494,13 +494,13 @@ ; GFX1064-NEXT: v_readlane_b32 s7, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_readlane_b32 s8, v1, 47 ; GFX1064-NEXT: v_readlane_b32 s9, v1, 63 ; GFX1064-NEXT: v_writelane_b32 v3, s7, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX1064-NEXT: s_mov_b32 s4, s9 ; GFX1064-NEXT: v_writelane_b32 v3, s8, 48 @@ -556,7 +556,7 @@ ; GFX1032-NEXT: v_readlane_b32 s6, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s4, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s5, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s4 @@ -680,8 +680,8 @@ ; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB3_2 @@ -718,7 +718,7 @@ ; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s5, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB3_2 @@ -899,8 +899,8 @@ ; GFX1064-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX1064-NEXT: s_mov_b64 s[8:9], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s8, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s9, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz BB4_2 @@ -935,7 +935,7 @@ ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s6, -1 ; GFX1064-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1064-NEXT: v_add_co_u32_e64 v0, vcc, s0, v0 +; GFX1064-NEXT: v_add_co_u32 v0, vcc, s0, v0 ; GFX1064-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm @@ -947,7 +947,7 @@ ; GFX1032-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX1032-NEXT: s_mov_b32 s8, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s8, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB4_2 @@ -982,7 +982,7 @@ ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 ; GFX1032-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1032-NEXT: v_add_co_u32_e64 v0, vcc_lo, s0, v0 +; GFX1032-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 ; GFX1032-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm @@ -1170,8 +1170,8 @@ ; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB6_2 @@ -1207,7 +1207,7 @@ ; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s5, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB6_2 @@ -1356,8 +1356,8 @@ ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x34 ; GFX1064-NEXT: s_mov_b64 s[8:9], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s8, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s9, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz BB7_2 @@ -1395,7 +1395,7 @@ ; GFX1032-NEXT: s_load_dword s2, s[0:1], 0x34 ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB7_2 @@ -1585,13 +1585,13 @@ ; GFX1064-NEXT: v_readlane_b32 s7, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_readlane_b32 s8, v1, 47 ; GFX1064-NEXT: v_readlane_b32 s9, v1, 63 ; GFX1064-NEXT: v_writelane_b32 v3, s7, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX1064-NEXT: s_mov_b32 s4, s9 ; GFX1064-NEXT: v_writelane_b32 v3, s8, 48 @@ -1647,7 +1647,7 @@ ; GFX1032-NEXT: v_readlane_b32 s6, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s4 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s4, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s5, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s4 @@ -1811,8 +1811,8 @@ ; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB9_2 @@ -1840,7 +1840,7 @@ ; GFX1064-NEXT: v_mul_u32_u24_e32 v1, 5, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v2 ; GFX1064-NEXT: v_mul_hi_u32_u24_e32 v2, 5, v0 -; GFX1064-NEXT: v_sub_co_u32_e64 v0, vcc, s2, v1 +; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s2, v1 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s3, v2, vcc ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 @@ -1852,7 +1852,7 @@ ; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s5, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB9_2 @@ -1880,7 +1880,7 @@ ; GFX1032-NEXT: v_mul_u32_u24_e32 v1, 5, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v2 ; GFX1032-NEXT: v_mul_hi_u32_u24_e32 v2, 5, v0 -; GFX1032-NEXT: v_sub_co_u32_e64 v0, vcc_lo, s2, v1 +; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v1 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s3, v2, vcc_lo ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 @@ -2036,8 +2036,8 @@ ; GFX1064-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX1064-NEXT: s_mov_b64 s[8:9], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s8, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s9, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s9, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz BB10_2 @@ -2072,7 +2072,7 @@ ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s6, -1 ; GFX1064-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1064-NEXT: v_sub_co_u32_e64 v0, vcc, s0, v0 +; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s0, v0 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s1, v1, vcc ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm @@ -2084,7 +2084,7 @@ ; GFX1032-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX1032-NEXT: s_mov_b32 s8, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s8, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB10_2 @@ -2119,7 +2119,7 @@ ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 ; GFX1032-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1032-NEXT: v_sub_co_u32_e64 v0, vcc_lo, s0, v0 +; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v0 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -108,8 +108,8 @@ ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s2, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s3, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB0_2 @@ -139,7 +139,7 @@ ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB0_2 @@ -274,8 +274,8 @@ ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz BB1_2 @@ -309,7 +309,7 @@ ; GFX1032-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB1_2 @@ -484,13 +484,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -541,7 +541,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -671,12 +671,12 @@ ; GFX1064-NEXT: v_permlanex16_b32 v2, v2, -1, -1 ; GFX1064-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX1064-NEXT: s_mov_b64 exec, s[0:1] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX1064-NEXT: v_readlane_b32 s2, v1, 0 ; GFX1064-NEXT: v_readlane_b32 s3, v1, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[0:1] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_add_i32 s0, s2, s3 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -707,7 +707,7 @@ ; GFX1032-NEXT: v_permlanex16_b32 v2, v2, -1, -1 ; GFX1032-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX1032-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo @@ -837,8 +837,8 @@ ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s5, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX1064-NEXT: s_cbranch_execz BB4_2 @@ -870,7 +870,7 @@ ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB4_2 @@ -1038,8 +1038,8 @@ ; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB5_2 @@ -1069,7 +1069,7 @@ ; GFX1064-NEXT: v_readfirstlane_b32 s4, v2 ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1064-NEXT: v_add_co_u32_e64 v0, vcc, s2, v0 +; GFX1064-NEXT: v_add_co_u32 v0, vcc, s2, v0 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: v_add_co_ci_u32_e32 v1, vcc, s4, v1, vcc ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -1080,7 +1080,7 @@ ; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s5, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB5_2 @@ -1110,7 +1110,7 @@ ; GFX1032-NEXT: v_readfirstlane_b32 s4, v2 ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1032-NEXT: v_add_co_u32_e64 v0, vcc_lo, s2, v0 +; GFX1032-NEXT: v_add_co_u32 v0, vcc_lo, s2, v0 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -1286,8 +1286,8 @@ ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[2:3], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s2, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s3, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s2, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s3, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB7_2 @@ -1318,7 +1318,7 @@ ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB7_2 @@ -1454,8 +1454,8 @@ ; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz BB8_2 @@ -1489,7 +1489,7 @@ ; GFX1032-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB8_2 @@ -1664,13 +1664,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -1721,7 +1721,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -1851,12 +1851,12 @@ ; GFX1064-NEXT: v_permlanex16_b32 v2, v2, -1, -1 ; GFX1064-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX1064-NEXT: s_mov_b64 exec, s[0:1] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[0:1], -1 ; GFX1064-NEXT: v_readlane_b32 s2, v1, 0 ; GFX1064-NEXT: v_readlane_b32 s3, v1, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[0:1] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_add_i32 s0, s2, s3 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -1887,7 +1887,7 @@ ; GFX1032-NEXT: v_permlanex16_b32 v2, v2, -1, -1 ; GFX1032-NEXT: v_add_nc_u32_e32 v1, v1, v2 ; GFX1032-NEXT: s_mov_b32 exec_lo, s0 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 ; GFX1032-NEXT: v_mov_b32_e32 v0, v1 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo @@ -2019,8 +2019,8 @@ ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[4:5], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s5, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s4, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX1064-NEXT: s_cbranch_execz BB11_2 @@ -2042,7 +2042,7 @@ ; GFX1064-NEXT: v_mul_u32_u24_e32 v1, 5, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v2 ; GFX1064-NEXT: v_mul_hi_u32_u24_e32 v2, 5, v0 -; GFX1064-NEXT: v_sub_co_u32_e64 v0, vcc, s2, v1 +; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s2, v1 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s3, v2, vcc ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 @@ -2055,7 +2055,7 @@ ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s3, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB11_2 @@ -2077,7 +2077,7 @@ ; GFX1032-NEXT: v_mul_u32_u24_e32 v1, 5, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v2 ; GFX1032-NEXT: v_mul_hi_u32_u24_e32 v2, 5, v0 -; GFX1032-NEXT: v_sub_co_u32_e64 v0, vcc_lo, s2, v1 +; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v1 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s3, v2, vcc_lo ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 @@ -2226,8 +2226,8 @@ ; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1064-NEXT: s_mov_b64 s[6:7], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX1064-NEXT: s_cbranch_execz BB12_2 @@ -2257,7 +2257,7 @@ ; GFX1064-NEXT: v_readfirstlane_b32 s4, v2 ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1064-NEXT: v_sub_co_u32_e64 v0, vcc, s2, v0 +; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s2, v0 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s4, v1, vcc ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -2268,7 +2268,7 @@ ; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX1032-NEXT: s_mov_b32 s5, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s5, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB12_2 @@ -2298,7 +2298,7 @@ ; GFX1032-NEXT: v_readfirstlane_b32 s4, v2 ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: v_add_nc_u32_e32 v1, v4, v3 -; GFX1032-NEXT: v_sub_co_u32_e64 v0, vcc_lo, s2, v0 +; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v0 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -2518,13 +2518,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -2575,7 +2575,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -2752,13 +2752,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -2809,7 +2809,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -2986,13 +2986,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -3043,7 +3043,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -3222,13 +3222,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v2, 31 ; GFX1064-NEXT: v_writelane_b32 v1, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v2, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v2, 47 ; GFX1064-NEXT: v_writelane_b32 v1, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v1, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -3281,7 +3281,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v2, 31 ; GFX1032-NEXT: v_mov_b32_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v1, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -3425,8 +3425,8 @@ ; GFX1064-LABEL: max_i64_constant: ; GFX1064: ; %bb.0: ; %entry ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -3459,7 +3459,7 @@ ; GFX1032-LABEL: max_i64_constant: ; GFX1032: ; %bb.0: ; %entry ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo @@ -3639,13 +3639,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v2, 31 ; GFX1064-NEXT: v_writelane_b32 v1, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v2, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v2, 47 ; GFX1064-NEXT: v_writelane_b32 v1, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v1, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -3698,7 +3698,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v2, 31 ; GFX1032-NEXT: v_mov_b32_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v1, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -3842,8 +3842,8 @@ ; GFX1064-LABEL: min_i64_constant: ; GFX1064: ; %bb.0: ; %entry ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -3876,7 +3876,7 @@ ; GFX1032-LABEL: min_i64_constant: ; GFX1032: ; %bb.0: ; %entry ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo @@ -4054,13 +4054,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -4111,7 +4111,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -4252,8 +4252,8 @@ ; GFX1064-LABEL: umax_i64_constant: ; GFX1064: ; %bb.0: ; %entry ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -4286,7 +4286,7 @@ ; GFX1032-LABEL: umax_i64_constant: ; GFX1032: ; %bb.0: ; %entry ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo @@ -4464,13 +4464,13 @@ ; GFX1064-NEXT: v_readlane_b32 s5, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s4, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX1064-NEXT: v_readlane_b32 s7, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s6, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s5, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[2:3] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s6, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[4:5] @@ -4521,7 +4521,7 @@ ; GFX1032-NEXT: v_readlane_b32 s4, v1, 31 ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s2, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s3, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s2 @@ -4662,8 +4662,8 @@ ; GFX1064-LABEL: umin_i64_constant: ; GFX1064: ; %bb.0: ; %entry ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -4696,7 +4696,7 @@ ; GFX1032-LABEL: umin_i64_constant: ; GFX1032: ; %bb.0: ; %entry ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll @@ -89,8 +89,8 @@ ; GFX1064-NEXT: ; %bb.1: ; GFX1064-NEXT: s_mov_b64 s[12:13], exec ; GFX1064-NEXT: ; implicit-def: $vgpr1 -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s12, 0 -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s13, v0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, s12, 0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s13, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX1064-NEXT: s_and_saveexec_b64 s[28:29], vcc ; GFX1064-NEXT: s_cbranch_execz BB0_3 @@ -124,7 +124,7 @@ ; GFX1032-NEXT: ; %bb.1: ; GFX1032-NEXT: s_mov_b32 s10, exec_lo ; GFX1032-NEXT: ; implicit-def: $vgpr1 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s10, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s9, vcc_lo ; GFX1032-NEXT: s_cbranch_execz BB0_3 @@ -316,13 +316,13 @@ ; GFX1064-NEXT: v_readlane_b32 s13, v1, 31 ; GFX1064-NEXT: v_writelane_b32 v3, s12, 16 ; GFX1064-NEXT: s_mov_b64 exec, s[10:11] -; GFX1064-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1064-NEXT: s_or_saveexec_b64 s[10:11], -1 ; GFX1064-NEXT: v_readlane_b32 s12, v1, 63 ; GFX1064-NEXT: v_readlane_b32 s14, v1, 47 ; GFX1064-NEXT: v_writelane_b32 v3, s13, 32 ; GFX1064-NEXT: s_mov_b64 exec, s[10:11] -; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, exec_hi, v0 +; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0 ; GFX1064-NEXT: s_or_saveexec_b64 s[10:11], -1 ; GFX1064-NEXT: v_writelane_b32 v3, s14, 48 ; GFX1064-NEXT: s_mov_b64 exec, s[10:11] @@ -375,7 +375,7 @@ ; GFX1032-NEXT: v_mov_b32_dpp v3, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; GFX1032-NEXT: v_readlane_b32 s10, v1, 15 ; GFX1032-NEXT: s_mov_b32 exec_lo, s9 -; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0 +; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1032-NEXT: s_or_saveexec_b32 s9, -1 ; GFX1032-NEXT: v_writelane_b32 v3, s10, 16 ; GFX1032-NEXT: s_mov_b32 exec_lo, s9 diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll --- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -51,7 +51,7 @@ ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; -; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} +; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} ; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) { entry: @@ -81,7 +81,7 @@ ; GFX9: v_mov_b32_e32 v1, 0x1234 ; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; -; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} +; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} ; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]] define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) { entry: @@ -125,7 +125,7 @@ ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; -; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} +; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} ; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] define amdgpu_kernel void @uaddo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) @@ -170,7 +170,7 @@ ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; -; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 +; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 ; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -236,7 +236,7 @@ ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; -; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} +; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} ; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) { entry: @@ -266,7 +266,7 @@ ; GFX9: v_mov_b32_e32 v1, 0x1234 ; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc ; -; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} +; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} ; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]] define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) { entry: @@ -310,7 +310,7 @@ ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; -; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} +; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} ; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] define amdgpu_kernel void @usubo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) @@ -355,7 +355,7 @@ ; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; -; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 +; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 ; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll @@ -26,7 +26,7 @@ ; GCN-LABEL: test_sink_small_offset_global_atomic_csub_i32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 +; GCN-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 ; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_and_saveexec_b32 s4, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -695,7 +695,7 @@ ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc_lo, v0, 2 +; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, 2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ushort v2, v[2:3] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -452,7 +452,7 @@ ; GFX10-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v1, -16, v1 -; GFX10-NEXT: v_add_nc_u16_e64 v1, v1, -8 +; GFX10-NEXT: v_add_nc_u16 v1, v1, -8 ; GFX10-NEXT: global_store_byte v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm %val = load i8, i8 addrspace(1)* %valptr diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -998,14 +998,14 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff00, v0 -; GFX10-NEXT: v_add_nc_u16_e64 v4, v0, 9 -; GFX10-NEXT: v_add_nc_u16_e64 v2, v2, 9 +; GFX10-NEXT: v_add_nc_u16 v4, v0, 9 +; GFX10-NEXT: v_add_nc_u16 v2, v2, 9 ; GFX10-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_or_b32_sdwa v2, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_mov_b32_e32 v4, 0 ; GFX10-NEXT: v_cvt_f32_ubyte3_e32 v3, v0 -; GFX10-NEXT: v_add_nc_u16_e64 v1, v1, s0 -; GFX10-NEXT: v_add_nc_u16_e64 v5, v2, s0 +; GFX10-NEXT: v_add_nc_u16 v1, v1, s0 +; GFX10-NEXT: v_add_nc_u16 v5, v2, s0 ; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v1 ; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll --- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll +++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll @@ -45,7 +45,7 @@ ; GFX10-NEXT: s_mov_b64 s[4:5], 0 ; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s5, s4, s4 +; GFX10-NEXT: v_add_co_u32 v0, s5, s4, s4 ; GFX10-NEXT: s_cmpk_lg_u32 s5, 0x0 ; GFX10-NEXT: s_addc_u32 s5, s4, 0 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -671,10 +671,10 @@ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 -; GFX10-NEXT: v_lshrrev_b16_e64 v1, v2, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v3, v0 +; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %ret = call i16 @llvm.fshr.i16(i16 %src0, i16 %src1, i16 %src2) @@ -846,27 +846,27 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v4 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v4 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX10-NEXT: v_and_b32_e32 v9, 15, v6 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v6 ; GFX10-NEXT: v_and_b32_e32 v15, 15, v8 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v2 -; GFX10-NEXT: v_lshrrev_b16_e64 v2, v4, v2 -; GFX10-NEXT: v_lshlrev_b16_e64 v10, 1, v10 +; GFX10-NEXT: v_lshrrev_b16 v2, v4, v2 +; GFX10-NEXT: v_lshlrev_b16 v10, 1, v10 ; GFX10-NEXT: v_and_b32_e32 v19, 15, v6 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v15, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, v15, v0 ; GFX10-NEXT: v_xor_b32_e32 v11, -1, v5 -; GFX10-NEXT: v_lshrrev_b16_e64 v4, v9, v7 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 1, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v6, v19, v10 +; GFX10-NEXT: v_lshrrev_b16 v4, v9, v7 +; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 +; GFX10-NEXT: v_lshlrev_b16 v6, v19, v10 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_and_b32_e32 v7, 15, v11 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v5 ; GFX10-NEXT: v_or_b32_e32 v11, v6, v4 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, v7, v1 -; GFX10-NEXT: v_lshrrev_b16_e64 v2, v2, v3 +; GFX10-NEXT: v_lshlrev_b16 v1, v7, v1 +; GFX10-NEXT: v_lshrrev_b16 v2, v2, v3 ; GFX10-NEXT: v_lshl_or_b32 v0, v11, 16, v0 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -996,15 +996,15 @@ ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v0 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v6 ; GFX10-NEXT: v_and_b32_e32 v6, 15, v6 -; GFX10-NEXT: v_lshlrev_b16_e64 v8, 1, v8 +; GFX10-NEXT: v_lshlrev_b16 v8, 1, v8 ; GFX10-NEXT: v_and_b32_e32 v13, 15, v10 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 1, v1 +; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 ; GFX10-NEXT: v_and_b32_e32 v9, 15, v9 -; GFX10-NEXT: v_lshrrev_b16_e64 v6, v6, v7 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 1, v0 +; GFX10-NEXT: v_lshrrev_b16 v6, v6, v7 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v2 -; GFX10-NEXT: v_lshlrev_b16_e64 v11, 1, v11 -; GFX10-NEXT: v_lshlrev_b16_e64 v7, v9, v8 +; GFX10-NEXT: v_lshlrev_b16 v11, 1, v11 +; GFX10-NEXT: v_lshlrev_b16 v7, v9, v8 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v10 ; GFX10-NEXT: v_xor_b32_e32 v10, -1, v5 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v4 @@ -1013,12 +1013,12 @@ ; GFX10-NEXT: v_and_b32_e32 v9, 15, v9 ; GFX10-NEXT: v_and_b32_e32 v10, 15, v10 ; GFX10-NEXT: v_and_b32_e32 v15, 15, v8 -; GFX10-NEXT: v_lshrrev_b16_e64 v2, v4, v2 -; GFX10-NEXT: v_lshrrev_b16_e64 v3, v5, v3 -; GFX10-NEXT: v_lshrrev_b16_e64 v4, v13, v12 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, v10, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, v15, v0 -; GFX10-NEXT: v_lshlrev_b16_e64 v5, v9, v11 +; GFX10-NEXT: v_lshrrev_b16 v2, v4, v2 +; GFX10-NEXT: v_lshrrev_b16 v3, v5, v3 +; GFX10-NEXT: v_lshrrev_b16 v4, v13, v12 +; GFX10-NEXT: v_lshlrev_b16 v1, v10, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v15, v0 +; GFX10-NEXT: v_lshlrev_b16 v5, v9, v11 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll b/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll --- a/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}test_add_lit: -; GFX10: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, 0x80992bff, v{{[0-9]+}} +; GFX10: v_add_co_u32 v{{[0-9]+}}, vcc_lo, 0x80992bff, v{{[0-9]+}} ; GFX10: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, 0xe7, v{{[0-9]+}}, vcc_lo ; GFX9: v_mov_b32_e32 [[C2:v[0-9]+]], 0xe7 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0x80992bff, v{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll @@ -120,9 +120,9 @@ ; ; GFX10-LABEL: global_xchg_saddr_i32_rtn_2048: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v2, vcc, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v2, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll --- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll @@ -85,7 +85,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg4096: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0xfffff000, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -111,7 +111,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg4097: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0xfffff000, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -137,7 +137,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg4098: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0xfffff000, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -241,7 +241,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg2049: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0xfffff800, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff800, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -264,7 +264,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg2050: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0xfffff800, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff800, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -309,7 +309,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_4294967296: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -333,7 +333,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_4294967297: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -357,7 +357,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_4294971391: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0x800, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -382,7 +382,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_4294971392: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0x1000, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x1000, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -407,7 +407,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967295: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0x800, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -431,7 +431,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967296: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -455,7 +455,7 @@ ; ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967297: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], 0, s2 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -496,9 +496,9 @@ ; ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -527,9 +527,9 @@ ; ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0x1000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -553,9 +553,9 @@ ; ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -584,9 +584,9 @@ ; ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -626,9 +626,9 @@ ; ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -668,9 +668,9 @@ ; ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0xfffff800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -694,9 +694,9 @@ ; ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, s[0:1], s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -935,7 +935,7 @@ ; ; GFX10-LABEL: global_load_i8_vgpr64_sgpr32: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, v0, s2 +; GFX10-NEXT: v_add_co_u32 v0, vcc, v0, s2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -960,9 +960,9 @@ ; ; GFX10-LABEL: global_load_i8_vgpr64_sgpr32_offset_4095: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, v0, s2 +; GFX10-NEXT: v_add_co_u32 v0, vcc, v0, s2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1001,7 +1001,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1087,7 +1087,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc, s2, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc, s2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll --- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll +++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll @@ -454,12 +454,12 @@ ; GFX10-NEXT: BB4_1: ; %bb3 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_and_b32_e32 v2, s1, v4 -; GFX10-NEXT: v_add_nc_u16_e64 v4, v4, 1 +; GFX10-NEXT: v_add_nc_u16 v4, v4, 1 ; GFX10-NEXT: v_cvt_f32_u32_e32 v7, v2 ; GFX10-NEXT: v_lshlrev_b64 v[5:6], 1, v[2:3] ; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v4 ; GFX10-NEXT: v_mul_f32_e32 v2, v7, v1 -; GFX10-NEXT: v_add_co_u32_e64 v5, s0, s2, v5 +; GFX10-NEXT: v_add_co_u32 v5, s0, s2, v5 ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s3, v6, s0 ; GFX10-NEXT: v_trunc_f32_e32 v2, v2 @@ -540,11 +540,11 @@ ; GFX10-NEXT: BB5_1: ; %bb3 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_and_b32_e32 v2, s1, v4 -; GFX10-NEXT: v_add_nc_u16_e64 v4, v4, 1 +; GFX10-NEXT: v_add_nc_u16 v4, v4, 1 ; GFX10-NEXT: v_cvt_f32_u32_e32 v7, v2 ; GFX10-NEXT: v_lshlrev_b64 v[5:6], 1, v[2:3] ; GFX10-NEXT: v_mul_f32_e32 v8, v7, v1 -; GFX10-NEXT: v_add_co_u32_e64 v5, s0, s2, v5 +; GFX10-NEXT: v_add_co_u32 v5, s0, s2, v5 ; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s3, v6, s0 ; GFX10-NEXT: v_trunc_f32_e32 v10, v8 ; GFX10-NEXT: v_mad_f32 v7, -v10, v0, v7 @@ -630,14 +630,14 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_bfe_i32 v5, v4, 0, 16 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX10-NEXT: v_add_nc_u16_e64 v4, v4, 1 +; GFX10-NEXT: v_add_nc_u16 v4, v4, 1 ; GFX10-NEXT: v_cvt_f32_i32_e32 v7, v5 ; GFX10-NEXT: v_xor_b32_e32 v8, s4, v5 ; GFX10-NEXT: v_lshlrev_b64 v[5:6], 1, v[2:3] ; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v4 ; GFX10-NEXT: v_mul_f32_e32 v2, v7, v1 ; GFX10-NEXT: v_ashrrev_i32_e32 v8, 30, v8 -; GFX10-NEXT: v_add_co_u32_e64 v5, s0, s2, v5 +; GFX10-NEXT: v_add_co_u32 v5, s0, s2, v5 ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-NEXT: v_trunc_f32_e32 v2, v2 ; GFX10-NEXT: v_or_b32_e32 v8, 1, v8 @@ -724,7 +724,7 @@ ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_bfe_i32 v7, v4, 0, 16 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v4 -; GFX10-NEXT: v_add_nc_u16_e64 v4, v4, 1 +; GFX10-NEXT: v_add_nc_u16 v4, v4, 1 ; GFX10-NEXT: v_cvt_f32_i32_e32 v11, v7 ; GFX10-NEXT: v_xor_b32_e32 v6, s1, v7 ; GFX10-NEXT: v_mul_f32_e32 v8, v11, v1 @@ -738,7 +738,7 @@ ; GFX10-NEXT: v_lshlrev_b64 v[5:6], 1, v[2:3] ; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v4 ; GFX10-NEXT: v_add_nc_u32_e32 v2, v8, v9 -; GFX10-NEXT: v_add_co_u32_e64 v5, s0, s2, v5 +; GFX10-NEXT: v_add_co_u32 v5, s0, s2, v5 ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s3, v6, s0 ; GFX10-NEXT: v_mul_lo_u32 v2, v2, s1 diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll --- a/llvm/test/CodeGen/AMDGPU/idot2.ll +++ b/llvm/test/CodeGen/AMDGPU/idot2.ll @@ -2875,9 +2875,9 @@ ; GFX10-DL-NEXT: global_load_ushort v2, v0, s[6:7] ; GFX10-DL-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v0, 8, v1 +; GFX10-DL-NEXT: v_lshrrev_b16 v0, 8, v1 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v3, 8, v2 +; GFX10-DL-NEXT: v_lshrrev_b16 v3, 8, v2 ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX10-DL-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v0, sext(v3), sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll --- a/llvm/test/CodeGen/AMDGPU/idot4s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll @@ -905,9 +905,9 @@ ; GFX10-DL-NEXT: global_load_dword v2, v0, s[6:7] ; GFX10-DL-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v0, 8, v1 +; GFX10-DL-NEXT: v_lshrrev_b16 v0, 8, v1 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v3, 8, v2 +; GFX10-DL-NEXT: v_lshrrev_b16 v3, 8, v2 ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v7, sext(v1), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v0, sext(v0), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX10-DL-NEXT: v_mul_i32_i24_sdwa v3, sext(v1), sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2 @@ -1120,30 +1120,30 @@ ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-DL-NEXT: global_load_ushort v3, v0, s[0:1] ; GFX10-DL-NEXT: s_waitcnt vmcnt(2) -; GFX10-DL-NEXT: v_ashrrev_i16_e64 v5, 8, v1 +; GFX10-DL-NEXT: v_ashrrev_i16 v5, 8, v1 ; GFX10-DL-NEXT: v_and_b32_sdwa v8, v4, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) -; GFX10-DL-NEXT: v_ashrrev_i16_e64 v6, 8, v2 +; GFX10-DL-NEXT: v_ashrrev_i16 v6, 8, v2 ; GFX10-DL-NEXT: v_and_b32_sdwa v7, v4, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v2, 16, v2 ; GFX10-DL-NEXT: v_lshl_or_b32 v5, v5, 16, v8 ; GFX10-DL-NEXT: v_lshl_or_b32 v6, v6, 16, v7 -; GFX10-DL-NEXT: v_ashrrev_i16_e64 v7, 8, v1 +; GFX10-DL-NEXT: v_ashrrev_i16 v7, 8, v1 ; GFX10-DL-NEXT: v_and_b32_sdwa v1, v4, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX10-DL-NEXT: v_ashrrev_i16_e64 v8, 8, v2 +; GFX10-DL-NEXT: v_ashrrev_i16 v8, 8, v2 ; GFX10-DL-NEXT: v_and_b32_sdwa v2, v4, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v4, v5, v6 ; GFX10-DL-NEXT: v_lshl_or_b32 v1, v7, 16, v1 ; GFX10-DL-NEXT: v_lshl_or_b32 v2, v8, 16, v2 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v4, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v4, v3 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v1, v1, v2 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v2, v3, v5 +; GFX10-DL-NEXT: v_add_nc_u16 v2, v3, v5 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v2, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v1, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v2, v1 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v1, v3 ; GFX10-DL-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -2020,10 +2020,10 @@ ; GFX10-DL-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-DL-NEXT: global_load_ushort v3, v0, s[0:1] ; GFX10-DL-NEXT: s_waitcnt vmcnt(2) -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v5, 8, v1 +; GFX10-DL-NEXT: v_lshrrev_b16 v5, 8, v1 ; GFX10-DL-NEXT: v_and_b32_sdwa v8, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-DL-NEXT: s_waitcnt vmcnt(1) -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v6, 8, v2 +; GFX10-DL-NEXT: v_lshrrev_b16 v6, 8, v2 ; GFX10-DL-NEXT: v_and_b32_sdwa v7, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-DL-NEXT: v_and_b32_sdwa v9, v1, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-DL-NEXT: v_and_b32_sdwa v10, v2, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD @@ -2038,12 +2038,12 @@ ; GFX10-DL-NEXT: v_lshl_or_b32 v1, v1, 16, v4 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v4, 16, v5 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v5, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v5, v3 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v1, v1, v2 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v2, v3, v4 +; GFX10-DL-NEXT: v_add_nc_u16 v2, v3, v4 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v2, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v1, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v2, v1 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v1, v3 ; GFX10-DL-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -2225,23 +2225,23 @@ ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v5, 24, v2 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v6, 16, v1 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v7, 16, v2 -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v8, 8, v2 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v4, v4, v5 -; GFX10-DL-NEXT: v_lshrrev_b16_e64 v5, 8, v1 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v9, v6, v7 +; GFX10-DL-NEXT: v_lshrrev_b16 v8, 8, v2 +; GFX10-DL-NEXT: v_mul_lo_u16 v4, v4, v5 +; GFX10-DL-NEXT: v_lshrrev_b16 v5, 8, v1 +; GFX10-DL-NEXT: v_mul_lo_u16 v9, v6, v7 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-NEXT: v_mad_u16 v3, v1, v2, v3 -; GFX10-DL-NEXT: v_lshlrev_b16_e64 v4, 8, v4 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v5, v5, v8 +; GFX10-DL-NEXT: v_lshlrev_b16 v4, 8, v4 +; GFX10-DL-NEXT: v_mul_lo_u16 v5, v5, v8 ; GFX10-DL-NEXT: v_or_b32_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-DL-NEXT: v_lshlrev_b16_e64 v5, 8, v5 +; GFX10-DL-NEXT: v_lshlrev_b16 v5, 8, v5 ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v8, 16, v4 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v2, 8, v4 ; GFX10-DL-NEXT: v_or_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v5, 8, v5 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v3, v5 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v3, v5 ; GFX10-DL-NEXT: v_mad_u16 v1, v6, v7, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v1, v2 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v1, v2 ; GFX10-DL-NEXT: global_store_byte v0, v1, s[0:1] ; GFX10-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -646,51 +646,51 @@ ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v10, 4, v1 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v16, 4, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v17, 12, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v1, 12, v1 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v17, 12, v2 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v1, 12, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v11, 28, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v10 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v16, 12, v16 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v10, 12, v10 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v16, 12, v16 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v12, 24, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v13, 20, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v14, 16, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v15, 12, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v2, 8, v2 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v18, 12, v1 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v17, 12, v17 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v9 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v10, 12, v10 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v2, 12, v2 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v16, 12, v16 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v18, 12, v1 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v17, 12, v17 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v9, 12, v9 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v10, 12, v10 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v2, 12, v2 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v16, 12, v16 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v18, v17, v3 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v8 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v9 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v15 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v2, 12, v2 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v8, 12, v8 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v3, 12, v9 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v9, 12, v15 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v2, 12, v2 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v10, v16, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v7 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v14 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v8, 12, v8 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v9 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v7, 12, v7 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v10, 12, v14 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v8, 12, v8 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v15, 12, v9 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v3, v2, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v6, 12, v6 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v2, 12, v7 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v13 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v10 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v6, 12, v6 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v2, 12, v7 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v7, 12, v13 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v3, 12, v10 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v8, v15, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v5 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v12 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v6, 12, v6 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v7 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v5, 12, v5 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v8, 12, v12 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v6, 12, v6 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v7, 12, v7 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v2, v3, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v4, 12, v4 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v2, 12, v5 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v8 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v11 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v4, 12, v4 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v2, 12, v5 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v3, 12, v8 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v5, 12, v11 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v6, v7, v1 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v4, 12, v4 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v5 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v4, 12, v4 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v7, 12, v5 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v2, v3, v1 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v4, v7, v1 ; GFX10-DL-XNACK-NEXT: global_store_short v0, v1, s[0:1] @@ -724,51 +724,51 @@ ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v10, 4, v1 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v16, 4, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v17, 12, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v1, 12, v1 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v17, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v1, 12, v1 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v11, 28, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v10 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v16, 12, v16 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v10, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v16, 12, v16 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v12, 24, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v13, 20, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v14, 16, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v15, 12, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v18, 12, v1 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v17, 12, v17 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v9 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v10, 12, v10 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v0, 12, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v16, 12, v16 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v18, 12, v1 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v17, 12, v17 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v9, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v10, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v0, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v16, 12, v16 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v1, v18, v17, v3 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v9 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v15 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v0, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v3, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v9, 12, v15 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v8, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v0, 12, v0 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v1, v10, v16, v1 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v7 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v14 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v9, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v7, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v10, 12, v14 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v15, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v9, 12, v9 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v3, v0, v1 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v6, 12, v6 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v1, 12, v7 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v13 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v6, 12, v6 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v1, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v7, 12, v13 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v3, 12, v10 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v15, v9, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v12 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v6, 12, v6 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v5, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v8, 12, v12 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v6, 12, v6 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v7, 12, v7 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v1, v3, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v1, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v11 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v4, 12, v4 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v1, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v5, 12, v11 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v3, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v4, 12, v4 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v6, v7, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v5, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v4 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v5, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v7, 12, v4 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v1, v3, v0 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v7, v5, v0 ; GFX10-DL-NOXNACK-NEXT: global_store_short v2, v0, s[0:1] @@ -1220,51 +1220,51 @@ ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v10, 4, v1 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v16, 4, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v17, 12, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v1, 12, v1 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v17, 12, v2 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v1, 12, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v11, 28, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v10 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v16, 12, v16 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v10, 12, v10 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v16, 12, v16 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v12, 24, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v13, 20, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v14, 16, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v15, 12, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v2, 8, v2 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v18, 12, v1 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v17, 12, v17 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v9 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v10, 12, v10 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v2, 12, v2 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v16, 12, v16 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v18, 12, v1 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v17, 12, v17 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v9, 12, v9 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v10, 12, v10 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v2, 12, v2 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v16, 12, v16 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v18, v17, v3 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v8 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v9 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v15 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v2, 12, v2 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v8, 12, v8 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v3, 12, v9 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v9, 12, v15 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v2, 12, v2 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v10, v16, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v7 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v14 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v8, 12, v8 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v9 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v7, 12, v7 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v10, 12, v14 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v8, 12, v8 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v15, 12, v9 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v3, v2, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v6, 12, v6 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v2, 12, v7 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v13 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v10 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v6, 12, v6 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v2, 12, v7 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v7, 12, v13 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v3, 12, v10 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v8, v15, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v5 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v12 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v6, 12, v6 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v7 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v5, 12, v5 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v8, 12, v12 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v6, 12, v6 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v7, 12, v7 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v2, v3, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v4, 12, v4 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v2, 12, v5 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v8 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v11 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v4, 12, v4 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v2, 12, v5 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v3, 12, v8 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v5, 12, v11 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v6, v7, v1 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v4, 12, v4 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v5 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v4, 12, v4 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v7, 12, v5 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v2, v3, v1 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v1, v4, v7, v1 ; GFX10-DL-XNACK-NEXT: global_store_byte v0, v1, s[0:1] @@ -1298,51 +1298,51 @@ ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v10, 4, v1 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v16, 4, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v17, 12, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v1, 12, v1 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v17, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v1, 12, v1 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v11, 28, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v10 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v16, 12, v16 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v10, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v16, 12, v16 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v12, 24, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v13, 20, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v14, 16, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v15, 12, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v0, 8, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v18, 12, v1 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v17, 12, v17 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v9 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v10, 12, v10 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v0, 12, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v16, 12, v16 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v18, 12, v1 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v17, 12, v17 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v9, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v10, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v0, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v16, 12, v16 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v1, v18, v17, v3 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v9 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v15 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v0, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v3, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v9, 12, v15 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v8, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v0, 12, v0 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v1, v10, v16, v1 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v7 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v14 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v9, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v7, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v10, 12, v14 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v15, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v9, 12, v9 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v3, v0, v1 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v6, 12, v6 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v1, 12, v7 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v13 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v6, 12, v6 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v1, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v7, 12, v13 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v3, 12, v10 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v15, v9, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v12 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v6, 12, v6 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v5, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v8, 12, v12 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v6, 12, v6 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v7, 12, v7 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v1, v3, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v1, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v11 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v4, 12, v4 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v1, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v5, 12, v11 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v3, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v4, 12, v4 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v6, v7, v0 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v5, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v4 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v5, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v7, 12, v4 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v1, v3, v0 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v7, v5, v0 ; GFX10-DL-NOXNACK-NEXT: global_store_byte v2, v0, s[0:1] @@ -2574,30 +2574,30 @@ ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v10, 16, v1 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v1, v1, v3 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v3 ; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v3, v4, v5 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v5, 12, v8 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v4, v4, v19 ; GFX10-DL-XNACK-NEXT: v_pk_mul_lo_u16 v2, v9, v2 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v1, v1, v10 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v10 ; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v3, v6, 16, v3 ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v4, v14, 16, v4 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v1, v1, v2 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v2 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v2, 12, v3 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v3, 12, v4 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_mul_lo_u16 v4, v7, v5 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v1, v1, v6 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v6 ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v3, 12, v3 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v7, v1, v4 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v7, v1, v4 ; GFX10-DL-XNACK-NEXT: v_pk_mul_lo_u16 v2, v2, v3 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v1, v7, v5 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v7, v5 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v1, v1, v2 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v1, v1, v3 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v2 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v3 ; GFX10-DL-XNACK-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-DL-XNACK-NEXT: s_endpgm ; @@ -2662,30 +2662,30 @@ ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v0, 12, v0 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v10, 16, v1 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v1, v1, v3 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v1, v1, v3 ; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v3, v4, v5 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v5, 12, v8 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v4, v4, v19 ; GFX10-DL-NOXNACK-NEXT: v_pk_mul_lo_u16 v0, v9, v0 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v1, v1, v10 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v1, v1, v10 ; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v3, v6, 16, v3 ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v4, v14, 16, v4 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v1, v0 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v1, v0 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v1, 12, v3 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v3, 12, v4 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_mul_lo_u16 v4, v7, v5 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v7, v0, v6 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v7, v0, v6 ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v3, 12, v3 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v7, v4 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v7, v4 ; GFX10-DL-NOXNACK-NEXT: v_pk_mul_lo_u16 v1, v1, v3 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v0, v5 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v0, v5 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v0, v1 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v0, v3 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v0, v1 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v0, v3 ; GFX10-DL-NOXNACK-NEXT: global_store_short v2, v0, s[0:1] ; GFX10-DL-NOXNACK-NEXT: s_endpgm ; GFX10-DL-LABEL: idot8_acc16_vecMul: @@ -3215,80 +3215,80 @@ ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v9, 8, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v16, 8, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v8 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v15, 12, v15 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v8, 12, v8 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v15, 12, v15 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v13, 28, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v10, 4, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v17, 4, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v9 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v16, 12, v16 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v8, 12, v8 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v15 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v9, 12, v9 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v16, 12, v16 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v8, 12, v8 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v15, 12, v15 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v0, 20, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v7, 24, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v11, 20, v2 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v14, 24, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v6, 12, v6 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v13, 12, v13 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v10 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v8, v8, v15 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v17, 12, v17 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v9, 12, v9 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v16, 12, v16 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v6, 12, v6 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v13, 12, v13 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v10, 12, v10 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v8, v8, v15 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v17, 12, v17 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v9, 12, v9 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v16, 12, v16 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v5, 16, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v12, 16, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v7 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v0, 12, v0 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v14, 12, v14 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v11, 12, v11 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v6, 12, v6 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v13, 12, v13 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v1, 12, v1 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v2, 12, v2 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v10, 12, v10 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v17 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v9, v9, v16 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v8, 8, v8 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v5 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v6, v6, v13 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v12, 12, v12 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v7 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v0, 12, v0 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v14, 12, v14 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v11, 12, v11 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v10, v10, v15 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v7, 12, v7 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v0, 12, v0 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v14, 12, v14 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v11, 12, v11 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v6, 12, v6 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v13, 12, v13 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v1, 12, v1 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v2, 12, v2 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v10, 12, v10 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v15, 12, v17 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v9, v9, v16 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v8, 8, v8 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v5, 12, v5 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v6, v6, v13 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v12, 12, v12 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v7, 12, v7 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v0, 12, v0 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v14, 12, v14 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v11, 12, v11 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v10, v10, v15 ; GFX10-DL-XNACK-NEXT: v_or_b32_sdwa v8, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v1, 12, v1 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v2, 12, v2 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v9, v0, v11 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v5, 12, v5 -; GFX10-DL-XNACK-NEXT: v_ashrrev_i16_e64 v23, 12, v12 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v11, v7, v14 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v6, 8, v6 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v1, v1, v2 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v10, 8, v10 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v1, 12, v1 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v2, 12, v2 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v9, v0, v11 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v5, 12, v5 +; GFX10-DL-XNACK-NEXT: v_ashrrev_i16 v23, 12, v12 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v11, v7, v14 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v6, 8, v6 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v1, v1, v2 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v10, 8, v10 ; GFX10-DL-XNACK-NEXT: v_lshlrev_b32_e32 v0, 16, v8 -; GFX10-DL-XNACK-NEXT: v_mul_lo_u16_e64 v2, v5, v23 -; GFX10-DL-XNACK-NEXT: v_lshlrev_b16_e64 v9, 8, v9 +; GFX10-DL-XNACK-NEXT: v_mul_lo_u16 v2, v5, v23 +; GFX10-DL-XNACK-NEXT: v_lshlrev_b16 v9, 8, v9 ; GFX10-DL-XNACK-NEXT: v_or_b32_sdwa v6, v11, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-DL-XNACK-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-DL-XNACK-NEXT: v_or_b32_sdwa v11, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-DL-XNACK-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-DL-XNACK-NEXT: v_lshlrev_b32_e32 v9, 16, v6 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v3, v1, v3 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v3, v1, v3 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v10, 8, v11 ; GFX10-DL-XNACK-NEXT: v_or_b32_sdwa v1, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v9, v3, v10 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v9, v3, v10 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b64 v[2:3], 24, v[0:1] ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v0, v9, v8 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v0, v0, v2 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v0, v9, v8 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v0, v0, v2 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v0, v5, v23, v0 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v0, v0, v1 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v1, 8, v6 ; GFX10-DL-XNACK-NEXT: v_mad_u16 v0, v7, v14, v0 -; GFX10-DL-XNACK-NEXT: v_add_nc_u16_e64 v0, v0, v1 +; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-DL-XNACK-NEXT: global_store_byte v19, v0, s[0:1] ; GFX10-DL-XNACK-NEXT: s_endpgm ; @@ -3319,77 +3319,77 @@ ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v13, 28, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v14, 24, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v17, 4, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v8, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v15, 12, v15 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v8, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v15, 12, v15 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v16, 8, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v0, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v0, 12, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v6, 28, v1 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v10, 4, v1 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v9, 12, v9 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v8, 12, v8 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v18, 12, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v0, 12, v16 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v15 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v9, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v8, 12, v8 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v18, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v0, 12, v16 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v15, 12, v15 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v3, 20, v1 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v6, 12, v6 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v13, 12, v13 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v10, 12, v10 -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v8, v8, v15 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v17, 12, v17 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v9, 12, v9 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v0, 12, v0 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v6, 12, v6 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v13, 12, v13 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v10, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v8, v8, v15 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v17, 12, v17 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v9, 12, v9 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v0, 12, v0 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v7, 12, v7 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v3, 12, v3 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v14, 12, v14 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v11, 12, v11 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v6, 12, v6 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v13, 12, v13 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v1, 12, v1 -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v23, v9, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v8, 8, v8 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v10, 12, v10 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v15, 12, v17 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v5, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v6, v6, v13 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v12, 12, v12 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v9, 12, v11 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v3, 12, v3 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v7, 12, v7 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v14, 12, v14 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v1, 12, v1 -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v10, v10, v15 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v7, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v3, 12, v3 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v14, 12, v14 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v11, 12, v11 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v6, 12, v6 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v13, 12, v13 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v1, 12, v1 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v23, v9, v0 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v8, 8, v8 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v10, 12, v10 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v15, 12, v17 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v5, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v6, v6, v13 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v12, 12, v12 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v9, 12, v11 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v3, 12, v3 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v7, 12, v7 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v14, 12, v14 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v1, 12, v1 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v10, v10, v15 ; GFX10-DL-NOXNACK-NEXT: v_or_b32_sdwa v8, v23, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v3, v3, v9 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v5, 12, v5 -; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16_e64 v11, 12, v12 -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v9, v7, v14 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v6, 8, v6 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v10, 8, v10 -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v1, v1, v18 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v3, v3, v9 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v5, 12, v5 +; GFX10-DL-NOXNACK-NEXT: v_ashrrev_i16 v11, 12, v12 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v9, v7, v14 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v6, 8, v6 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v10, 8, v10 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v1, v1, v18 ; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b32_e32 v0, 16, v8 -; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16_e64 v12, v5, v11 -; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16_e64 v3, 8, v3 +; GFX10-DL-NOXNACK-NEXT: v_mul_lo_u16 v12, v5, v11 +; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b16 v3, 8, v3 ; GFX10-DL-NOXNACK-NEXT: v_or_b32_sdwa v6, v9, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-DL-NOXNACK-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-DL-NOXNACK-NEXT: v_or_b32_sdwa v9, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-DL-NOXNACK-NEXT: v_or_b32_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-DL-NOXNACK-NEXT: v_lshlrev_b32_e32 v10, 16, v6 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v2, v1, v2 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v2, v1, v2 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v9, 8, v9 ; GFX10-DL-NOXNACK-NEXT: v_or_b32_sdwa v1, v3, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v9, v2, v9 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v9, v2, v9 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b64 v[2:3], 24, v[0:1] ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v9, v8 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v0, v2 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v9, v8 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v0, v2 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v5, v11, v0 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v0, v1 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v1, 8, v6 ; GFX10-DL-NOXNACK-NEXT: v_mad_u16 v0, v7, v14, v0 -; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16_e64 v0, v0, v1 +; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-DL-NOXNACK-NEXT: global_store_byte v19, v0, s[0:1] ; GFX10-DL-NOXNACK-NEXT: s_endpgm ; GFX10-DL-LABEL: idot8_acc8_vecMul: diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll --- a/llvm/test/CodeGen/AMDGPU/idot8u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll @@ -2393,32 +2393,32 @@ ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v23, 28, v1 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v12, 16, v6 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v6, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v6, v3 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v9, v9, v10 ; GFX10-DL-NEXT: v_bfe_u32 v1, v1, 20, 4 ; GFX10-DL-NEXT: v_and_b32_e32 v11, v4, v11 ; GFX10-DL-NEXT: v_bfe_u32 v6, v2, 20, 4 ; GFX10-DL-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v3, v12 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v3, v12 ; GFX10-DL-NEXT: v_bfe_u32 v10, v2, 24, 4 ; GFX10-DL-NEXT: v_lshl_or_b32 v1, v1, 16, v11 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v2, 28, v2 ; GFX10-DL-NEXT: v_lshl_or_b32 v6, v6, 16, v7 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v7, 16, v9 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v14, v3, v9 +; GFX10-DL-NEXT: v_add_nc_u16 v14, v3, v9 ; GFX10-DL-NEXT: v_and_b32_e32 v9, v4, v10 ; GFX10-DL-NEXT: v_and_b32_e32 v4, v4, v5 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v1, v1, v6 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v14, v7 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v14, v7 ; GFX10-DL-NEXT: v_lshl_or_b32 v2, v2, 16, v9 ; GFX10-DL-NEXT: v_lshl_or_b32 v4, v23, 16, v4 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v3, v1 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v3, v1 ; GFX10-DL-NEXT: v_pk_mul_lo_u16 v2, v4, v2 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v3, v5 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v3, v5 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v1, v2 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v1, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v1, v2 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v1, v3 ; GFX10-DL-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, @@ -2782,49 +2782,49 @@ ; GFX10-DL-NEXT: v_bfe_u32 v13, v2, 8, 4 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v7, 28, v1 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v14, 28, v2 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v9, v9, v10 +; GFX10-DL-NEXT: v_mul_lo_u16 v9, v9, v10 ; GFX10-DL-NEXT: v_bfe_u32 v5, v1, 16, 4 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v8, v8, v13 +; GFX10-DL-NEXT: v_mul_lo_u16 v8, v8, v13 ; GFX10-DL-NEXT: v_bfe_u32 v0, v1, 20, 4 ; GFX10-DL-NEXT: v_bfe_u32 v6, v1, 24, 4 ; GFX10-DL-NEXT: v_and_b32_e32 v11, 15, v1 -; GFX10-DL-NEXT: v_lshlrev_b16_e64 v9, 8, v9 +; GFX10-DL-NEXT: v_lshlrev_b16 v9, 8, v9 ; GFX10-DL-NEXT: v_bfe_u32 v1, v1, 4, 4 ; GFX10-DL-NEXT: v_bfe_u32 v15, v2, 4, 4 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v7, v7, v14 +; GFX10-DL-NEXT: v_mul_lo_u16 v7, v7, v14 ; GFX10-DL-NEXT: v_bfe_u32 v10, v2, 20, 4 ; GFX10-DL-NEXT: v_bfe_u32 v13, v2, 24, 4 ; GFX10-DL-NEXT: v_bfe_u32 v23, v2, 16, 4 ; GFX10-DL-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v1, v1, v15 +; GFX10-DL-NEXT: v_mul_lo_u16 v1, v1, v15 ; GFX10-DL-NEXT: v_or_b32_e32 v8, v8, v9 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v9, v0, v10 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v10, v6, v13 -; GFX10-DL-NEXT: v_lshlrev_b16_e64 v7, 8, v7 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v2, v11, v2 -; GFX10-DL-NEXT: v_lshlrev_b16_e64 v1, 8, v1 +; GFX10-DL-NEXT: v_mul_lo_u16 v9, v0, v10 +; GFX10-DL-NEXT: v_mul_lo_u16 v10, v6, v13 +; GFX10-DL-NEXT: v_lshlrev_b16 v7, 8, v7 +; GFX10-DL-NEXT: v_mul_lo_u16 v2, v11, v2 +; GFX10-DL-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v0, 16, v8 -; GFX10-DL-NEXT: v_mul_lo_u16_e64 v11, v5, v23 +; GFX10-DL-NEXT: v_mul_lo_u16 v11, v5, v23 ; GFX10-DL-NEXT: v_or_b32_e32 v7, v10, v7 -; GFX10-DL-NEXT: v_lshlrev_b16_e64 v9, 8, v9 +; GFX10-DL-NEXT: v_lshlrev_b16 v9, 8, v9 ; GFX10-DL-NEXT: v_or_b32_sdwa v10, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-DL-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX10-DL-NEXT: v_or_b32_e32 v2, v11, v9 ; GFX10-DL-NEXT: v_lshlrev_b32_e32 v9, 16, v7 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v10, 8, v10 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v1, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v1, v3 ; GFX10-DL-NEXT: v_or_b32_sdwa v1, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX10-DL-NEXT: v_add_nc_u16_e64 v9, v3, v10 +; GFX10-DL-NEXT: v_add_nc_u16 v9, v3, v10 ; GFX10-DL-NEXT: v_lshrrev_b64 v[2:3], 24, v[0:1] ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v1, 8, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v0, v9, v8 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v0, v0, v2 +; GFX10-DL-NEXT: v_add_nc_u16 v0, v9, v8 +; GFX10-DL-NEXT: v_add_nc_u16 v0, v0, v2 ; GFX10-DL-NEXT: v_mad_u16 v0, v5, v23, v0 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v0, v0, v1 +; GFX10-DL-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v1, 8, v7 ; GFX10-DL-NEXT: v_mad_u16 v0, v6, v13, v0 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v0, v0, v1 +; GFX10-DL-NEXT: v_add_nc_u16 v0, v0, v1 ; GFX10-DL-NEXT: global_store_byte v19, v0, s[0:1] ; GFX10-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, @@ -3121,28 +3121,28 @@ ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v6, v6, v7 ; GFX10-DL-NEXT: v_bfe_u32 v7, v2, 12, 4 ; GFX10-DL-NEXT: s_waitcnt vmcnt(0) -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v4, v3 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v4, v3 ; GFX10-DL-NEXT: v_bfe_u32 v4, v1, 12, 4 ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v5, v5, v8 ; GFX10-DL-NEXT: v_bfe_u32 v8, v2, 16, 4 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v3, v6 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v3, v6 ; GFX10-DL-NEXT: v_bfe_u32 v6, v1, 16, 4 ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v4, v4, v7 ; GFX10-DL-NEXT: v_bfe_u32 v7, v2, 20, 4 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v3, v5 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v3, v5 ; GFX10-DL-NEXT: v_bfe_u32 v5, v1, 20, 4 ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v6, v6, v8 ; GFX10-DL-NEXT: v_bfe_u32 v8, v2, 24, 4 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v1, 28, v1 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v3, v4 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v3, v4 ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v5, v5, v7 ; GFX10-DL-NEXT: v_lshrrev_b32_e32 v2, 28, v2 ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v4, v11, v8 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v3, v6 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v3, v6 ; GFX10-DL-NEXT: v_mul_u32_u24_e32 v1, v1, v2 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v3, v3, v5 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v2, v3, v4 -; GFX10-DL-NEXT: v_add_nc_u16_e64 v1, v2, v1 +; GFX10-DL-NEXT: v_add_nc_u16 v3, v3, v5 +; GFX10-DL-NEXT: v_add_nc_u16 v2, v3, v4 +; GFX10-DL-NEXT: v_add_nc_u16 v1, v2, v1 ; GFX10-DL-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX10-DL-NEXT: global_store_byte v0, v1, s[2:3] ; GFX10-DL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll --- a/llvm/test/CodeGen/AMDGPU/imm16.ll +++ b/llvm/test/CodeGen/AMDGPU/imm16.ll @@ -1115,7 +1115,7 @@ ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] -; GFX10-NEXT: v_add_nc_u16_e64 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00] +; GFX10-NEXT: v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00] ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; @@ -1176,7 +1176,7 @@ ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] -; GFX10-NEXT: v_add_nc_u16_e64 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00] +; GFX10-NEXT: v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00] ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; @@ -1237,7 +1237,7 @@ ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] -; GFX10-NEXT: v_add_nc_u16_e64 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00] +; GFX10-NEXT: v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00] ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; @@ -1372,7 +1372,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +; GFX10-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1407,7 +1407,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] +; GFX10-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1442,7 +1442,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] +; GFX10-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1477,7 +1477,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] +; GFX10-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1512,7 +1512,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] +; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1548,7 +1548,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] +; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1584,7 +1584,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] +; GFX10-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1619,7 +1619,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] +; GFX10-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] @@ -1654,7 +1654,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] +; GFX10-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -6,7 +6,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOMADMACF32,GFX103 %s ; GCN-LABEL: {{^}}test_mul_legacy_f32: -; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} +; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) store float %result, float addrspace(1)* %out, align 4 @@ -14,7 +14,7 @@ } ; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32: -; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} +; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out, float %a) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a) store float %result, float addrspace(1)* %out, align 4 @@ -22,7 +22,7 @@ } ; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32: -; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} +; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, float %a) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef) store float %result, float addrspace(1)* %out, align 4 @@ -30,7 +30,7 @@ } ; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32: -; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}| +; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}| define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) @@ -41,7 +41,7 @@ ; Don't form mad/mac instructions because they don't support denormals. ; GCN-LABEL: {{^}}test_add_mul_legacy_f32: -; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} +; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} ; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -62,12 +62,12 @@ ; GFX10-NEXT: v_mul_hi_u32 v9, v1, v3 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, v3 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 -; GFX10-NEXT: v_add_co_u32_e64 v10, vcc_lo, v6, v5 +; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, v6, v5 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v4, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v3, vcc_lo, v10, v8 +; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v10, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v4, v7, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v9, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v3, vcc_lo, v3, v1 +; GFX10-NEXT: v_add_co_u32 v3, vcc_lo, v3, v1 ; GFX10-NEXT: v_add3_u32 v1, v6, v5, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v4, vcc_lo ; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[3:4] @@ -164,21 +164,21 @@ ; GFX10-NEXT: v_mul_hi_u32 v7, v1, v2 ; GFX10-NEXT: v_mul_hi_i32 v9, v1, v3 ; GFX10-NEXT: v_mul_lo_u32 v11, v1, v3 -; GFX10-NEXT: v_add_co_u32_e64 v10, vcc_lo, v5, v15 +; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, v5, v15 ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v6, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v10, vcc_lo, v10, v8 +; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, v10, v8 ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v6, v7, vcc_lo ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v9, vcc_lo -; GFX10-NEXT: v_add_co_u32_e64 v11, vcc_lo, v6, v11 +; GFX10-NEXT: v_add_co_u32 v11, vcc_lo, v6, v11 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo -; GFX10-NEXT: v_sub_co_u32_e64 v9, vcc_lo, v11, v2 +; GFX10-NEXT: v_sub_co_u32 v9, vcc_lo, v11, v2 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v10, vcc_lo, 0, v7, vcc_lo ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v1 ; GFX10-NEXT: v_add3_u32 v1, v5, v15, v8 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v11, v9, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc_lo ; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v1 -; GFX10-NEXT: v_sub_co_u32_e64 v8, vcc_lo, v6, v0 +; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v6, v0 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v2 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v9, vcc_lo, 0, v7, vcc_lo ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 0, v3 @@ -410,7 +410,7 @@ ; GFX10-NEXT: v_cndmask_b32_e32 v1, s6, v1, vcc_lo ; GFX10-NEXT: s_ashr_i32 s4, s1, 31 ; GFX10-NEXT: s_mov_b32 s5, s4 -; GFX10-NEXT: v_sub_co_u32_e64 v2, vcc_lo, v0, s0 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, s0 ; GFX10-NEXT: s_mul_i32 s0, s0, s2 ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: v_cmp_lt_i32_e64 vcc_lo, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}mul_legacy ; GFX908: v_mul_legacy_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GFX90A: v_mul_legacy_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX90A: v_mul_legacy_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @mul_legacy( float addrspace(1)* %r, float addrspace(1)* %a, diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -114,7 +114,7 @@ ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: v_add_co_u32_e64 v0, s0, s0, v0 +; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 ; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] slc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 @@ -128,7 +128,7 @@ ; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: v_add_co_u32_e64 v0, s0, s0, v0 +; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 ; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] slc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 @@ -302,7 +302,7 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: v_add_co_u32_e64 v0, s0, s2, v0 +; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[1:2] ; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -316,7 +316,7 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: v_add_co_u32_e64 v0, s0, s2, v0 +; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-CU-NEXT: flat_load_dword v2, v[1:2] ; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -89,7 +89,7 @@ ; GFX10-WGP-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: v_add_co_u32_e64 v0, s0, s0, v0 +; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 ; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) @@ -104,7 +104,7 @@ ; GFX10-CU-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: v_add_co_u32_e64 v0, s0, s0, v0 +; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 ; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) @@ -225,7 +225,7 @@ ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-WGP-NEXT: v_add_co_u32_e64 v0, s0, s2, v0 +; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[1:2] ; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -240,7 +240,7 @@ ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 -; GFX10-CU-NEXT: v_add_co_u32_e64 v0, s0, s2, v0 +; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-CU-NEXT: flat_load_dword v2, v[1:2] ; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll --- a/llvm/test/CodeGen/AMDGPU/min.ll +++ b/llvm/test/CodeGen/AMDGPU/min.ll @@ -180,7 +180,7 @@ ; SI: v_min_i32_e32 ; GFX8_9: v_min_i16_e32 -; GFX10: v_min_i16_e64 +; GFX10: v_min_i16 ; EG: MIN_INT define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 { @@ -354,7 +354,7 @@ ; GFX8_9_10: {{flat|global}}_load_ubyte ; GFX8_9_10: {{flat|global}}_load_ubyte ; GFX8_9: v_min_u16_e32 -; GFX10: v_min_u16_e64 +; GFX10: v_min_u16 ; EG: MIN_UINT define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -17,7 +17,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, 1 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -39,7 +39,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -61,7 +61,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -85,7 +85,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -109,7 +109,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -133,7 +133,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -157,7 +157,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -179,7 +179,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -203,7 +203,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -227,7 +227,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3fff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -251,7 +251,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -275,7 +275,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -299,7 +299,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -324,7 +324,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -349,7 +349,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -374,7 +374,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -399,7 +399,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -424,7 +424,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -449,7 +449,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -475,7 +475,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -501,7 +501,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -527,7 +527,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -553,7 +553,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -579,7 +579,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -605,7 +605,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -57,7 +57,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -81,7 +81,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -123,7 +123,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -147,7 +147,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -169,7 +169,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -193,7 +193,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -217,7 +217,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -239,7 +239,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -263,7 +263,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -287,7 +287,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -312,7 +312,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -337,7 +337,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -362,7 +362,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -387,7 +387,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -412,7 +412,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -437,7 +437,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -463,7 +463,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -489,7 +489,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -515,7 +515,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -541,7 +541,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -567,7 +567,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -593,7 +593,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -748,7 +748,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -778,7 +778,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -883,7 +883,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -913,7 +913,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -943,7 +943,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffc000, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -973,7 +973,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1003,7 +1003,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1033,7 +1033,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1064,7 +1064,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1000, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1095,7 +1095,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1800, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1126,7 +1126,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x2000, s0 +; GFX10-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1159,7 +1159,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1192,7 +1192,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1225,7 +1225,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1258,7 +1258,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1291,7 +1291,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -1324,7 +1324,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0 +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll --- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s ; GCN-LABEL: {{^}}shl_i16: -; GCN: v_lshlrev_b16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_lshlrev_b16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @shl_i16(i16 %x, i16 %y) { %res = shl i16 %x, %y @@ -10,7 +10,7 @@ } ; GCN-LABEL: {{^}}lshr_i16: -; GCN: v_lshrrev_b16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_lshrrev_b16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @lshr_i16(i16 %x, i16 %y) { %res = lshr i16 %x, %y @@ -18,7 +18,7 @@ } ; GCN-LABEL: {{^}}ashr_i16: -; GCN: v_ashrrev_i16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_ashrrev_i16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @ashr_i16(i16 %x, i16 %y) { %res = ashr i16 %x, %y @@ -26,7 +26,7 @@ } ; GCN-LABEL: {{^}}add_u16: -; GCN: v_add_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_add_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @add_u16(i16 %x, i16 %y) { %res = add i16 %x, %y @@ -34,7 +34,7 @@ } ; GCN-LABEL: {{^}}sub_u16: -; GCN: v_sub_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_sub_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @sub_u16(i16 %x, i16 %y) { %res = sub i16 %x, %y @@ -42,7 +42,7 @@ } ; GCN-LABEL: {{^}}mul_lo_u16: -; GCN: v_mul_lo_u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_mul_lo_u16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @mul_lo_u16(i16 %x, i16 %y) { %res = mul i16 %x, %y @@ -50,7 +50,7 @@ } ; GCN-LABEL: {{^}}min_u16: -; GCN: v_min_u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_min_u16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @min_u16(i16 %x, i16 %y) { %cmp = icmp ule i16 %x, %y @@ -59,7 +59,7 @@ } ; GCN-LABEL: {{^}}min_i16: -; GCN: v_min_i16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_min_i16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @min_i16(i16 %x, i16 %y) { %cmp = icmp sle i16 %x, %y @@ -68,7 +68,7 @@ } ; GCN-LABEL: {{^}}max_u16: -; GCN: v_max_u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_max_u16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @max_u16(i16 %x, i16 %y) { %cmp = icmp uge i16 %x, %y @@ -77,7 +77,7 @@ } ; GCN-LABEL: {{^}}max_i16: -; GCN: v_max_i16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_max_i16{{[_e32]*}} [[OP:v[0-9]+]], ; GCN-NEXT: s_setpc_b64 define i16 @max_i16(i16 %x, i16 %y) { %cmp = icmp sge i16 %x, %y @@ -86,7 +86,7 @@ } ; GCN-LABEL: {{^}}shl_i16_zext_i32: -; GCN: v_lshlrev_b16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_lshlrev_b16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @shl_i16_zext_i32(i16 %x, i16 %y) { @@ -96,7 +96,7 @@ } ; GCN-LABEL: {{^}}lshr_i16_zext_i32: -; GCN: v_lshrrev_b16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_lshrrev_b16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) { @@ -106,7 +106,7 @@ } ; GCN-LABEL: {{^}}ashr_i16_zext_i32: -; GCN: v_ashrrev_i16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_ashrrev_i16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) { @@ -116,7 +116,7 @@ } ; GCN-LABEL: {{^}}add_u16_zext_i32: -; GCN: v_add_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_add_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @add_u16_zext_i32(i16 %x, i16 %y) { @@ -126,7 +126,7 @@ } ; GCN-LABEL: {{^}}sub_u16_zext_i32: -; GCN: v_sub_{{(nc_)*}}u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_sub_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @sub_u16_zext_i32(i16 %x, i16 %y) { @@ -136,7 +136,7 @@ } ; GCN-LABEL: {{^}}mul_lo_u16_zext_i32: -; GCN: v_mul_lo_u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_mul_lo_u16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) { @@ -146,7 +146,7 @@ } ; GCN-LABEL: {{^}}min_u16_zext_i32: -; GCN: v_min_u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_min_u16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @min_u16_zext_i32(i16 %x, i16 %y) { @@ -157,7 +157,7 @@ } ; GCN-LABEL: {{^}}min_i16_zext_i32: -; GCN: v_min_i16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_min_i16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @min_i16_zext_i32(i16 %x, i16 %y) { @@ -168,7 +168,7 @@ } ; GCN-LABEL: {{^}}max_u16_zext_i32: -; GCN: v_max_u16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_max_u16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @max_u16_zext_i32(i16 %x, i16 %y) { @@ -179,7 +179,7 @@ } ; GCN-LABEL: {{^}}max_i16_zext_i32: -; GCN: v_max_i16_e{{32|64}} [[OP:v[0-9]+]], +; GCN: v_max_i16{{[_e32]*}} [[OP:v[0-9]+]], ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]] ; GCN-NEXT: s_setpc_b64 define i32 @max_i16_zext_i32(i16 %x, i16 %y) { diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll --- a/llvm/test/CodeGen/AMDGPU/saddo.ll +++ b/llvm/test/CodeGen/AMDGPU/saddo.ll @@ -89,7 +89,7 @@ ; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[0:1], s[6:7] ; GFX10-NEXT: s_xor_b32 s2, s2, s3 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32_e64 v0, s0, s0, v0 +; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm @@ -461,7 +461,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[9:10], v6, s[8:9] ; GFX10-NEXT: global_load_dwordx2 v[2:3], v6, s[10:11] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_co_u32_e64 v7, vcc_lo, v9, v2 +; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v9, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v10, v3, vcc_lo ; GFX10-NEXT: v_cmp_gt_i64_e32 vcc_lo, 0, v[2:3] ; GFX10-NEXT: v_cmp_lt_i64_e64 s0, v[7:8], v[9:10] diff --git a/llvm/test/CodeGen/AMDGPU/saddsat.ll b/llvm/test/CodeGen/AMDGPU/saddsat.ll --- a/llvm/test/CodeGen/AMDGPU/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/saddsat.ll @@ -36,10 +36,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 8, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10-NEXT: v_add_nc_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 8, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.sadd.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result @@ -486,7 +486,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_co_u32_e64 v10, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_u32 v10, vcc_lo, v0, v2 ; GFX10-NEXT: v_bfrev_b32_e32 v6, -2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -61,7 +61,7 @@ ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; NOSDWA-NOT: v_mul_u32_u24_sdwa ; GFX89: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX10: v_mul_lo_u16_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX10: v_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; SDWA-NOT: v_mul_u32_u24_sdwa define amdgpu_kernel void @mul_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %ina, i16 addrspace(1)* %inb) #0 { @@ -268,7 +268,7 @@ ; NOSDWA: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; NOSDWA-NOT: v_mul_u32_u24_sdwa ; GFX89: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX10: v_mul_lo_u16_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX10: v_mul_lo_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; SDWA-NOT: v_mul_u32_u24_sdwa define amdgpu_kernel void @mul_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %ina, i8 addrspace(1)* %inb) #0 { @@ -296,12 +296,12 @@ ; GFX9-DAG: v_mul_lo_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 ; GFX9-DAG: v_mul_lo_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 ; GFX9: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10: v_lshlrev_b16_e64 v{{[0-9]+}}, 8, v +; GFX10: v_lshlrev_b16 v{{[0-9]+}}, 8, v ; GFX10: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD define amdgpu_kernel void @mul_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %ina, <2 x i8> addrspace(1)* %inb) #0 { entry: @@ -331,10 +331,10 @@ ; GFX9-DAG: v_mul_lo_u16_sdwa ; GFX9-DAG: v_mul_lo_u16_sdwa -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 define amdgpu_kernel void @mul_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %ina, <4 x i8> addrspace(1)* %inb) #0 { entry: @@ -370,14 +370,14 @@ ; GFX9-DAG: v_mul_lo_u16_sdwa ; GFX9-DAG: v_mul_lo_u16_sdwa -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 -; GFX10-DAG: v_mul_lo_u16_e64 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 +; GFX10-DAG: v_mul_lo_u16 define amdgpu_kernel void @mul_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %ina, <8 x i8> addrspace(1)* %inb) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll --- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}shl_base_atomicrmw_global_atomic_csub_ptr: ; GCN-DAG: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, 2, v[4:5] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 43 -; GCN: v_add_co_u32_e64 v[[EXTRA_LO:[0-9]+]], vcc_lo, 0x80, v4 +; GCN: v_add_co_u32 v[[EXTRA_LO:[0-9]+]], vcc_lo, 0x80, v4 ; GCN: v_add_co_ci_u32_e32 v[[EXTRA_HI:[0-9]+]], vcc_lo, 0, v5, vcc_lo ; GCN: global_atomic_csub v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]], off offset:512 glc ; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[EXTRA_LO]]:[[EXTRA_HI]]{{\]}} diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -716,7 +716,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_ushort v1, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_nc_u16_e64 v1, v1, 64 +; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64 ; GFX10-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -786,7 +786,7 @@ ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_ushort v1, v1, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_nc_u16_e64 v1, v1, 64 +; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm @@ -873,8 +873,8 @@ ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_nc_u16_e64 v1, v1, 64 -; GFX10-NEXT: v_sub_nc_u16_e64 v2, v2, 64 +; GFX10-NEXT: v_sub_nc_u16 v1, v1, 64 +; GFX10-NEXT: v_sub_nc_u16 v2, v2, 64 ; GFX10-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_short v0, v2, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/ssubsat.ll --- a/llvm/test/CodeGen/AMDGPU/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/ssubsat.ll @@ -36,10 +36,10 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b16_e64 v1, 8, v1 -; GFX10-NEXT: v_lshlrev_b16_e64 v0, 8, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 ; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_ashrrev_i16_e64 v0, 8, v0 +; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result @@ -1100,7 +1100,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_co_u32_e64 v10, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, v0, v2 ; GFX10-NEXT: v_bfrev_b32_e32 v6, -2 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v11, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll --- a/llvm/test/CodeGen/AMDGPU/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll @@ -34,7 +34,7 @@ ; GFX10-NEXT: s_movk_i32 s4, 0xff ; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 ; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 -; GFX10-NEXT: v_sub_nc_u16_e64 v0, v0, v1 clamp +; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.usub.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result @@ -67,7 +67,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_nc_u16_e64 v0, v0, v1 clamp +; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %result @@ -577,7 +577,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_co_u32_e64 v2, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 ; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo ; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] ; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -9,7 +9,7 @@ ; GCN-NEXT: BB0_1: ; %bb0 ; GCN-NEXT: ; =>This Loop Header: Depth=1 ; GCN-NEXT: ; Child Loop BB0_2 Depth 2 -; GCN-NEXT: v_add_co_u32_e64 v6, vcc_lo, v0, 8 +; GCN-NEXT: v_add_co_u32 v6, vcc_lo, v0, 8 ; GCN-NEXT: s_mov_b32 s5, exec_lo ; GCN-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo ; GCN-NEXT: s_clause 0x1 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -285,9 +285,9 @@ } ; GCN-LABEL: {{^}}test_addc_vop2b: -; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, s{{[0-9]+}} +; GFX1032: v_add_co_u32 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, s{{[0-9]+}} ; GFX1032: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, v{{[0-9]+}}, vcc_lo -; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, s{{[0-9]+}} +; GFX1064: v_add_co_u32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}}, vcc{{$}} define amdgpu_kernel void @test_addc_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 { bb: @@ -300,9 +300,9 @@ } ; GCN-LABEL: {{^}}test_subbrev_vop2b: -; GFX1032: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s[0-9]+|vcc_lo]], v{{[0-9]+}}, s{{[0-9]+}}{{$}} +; GFX1032: v_sub_co_u32 v{{[0-9]+}}, [[A0:s[0-9]+|vcc_lo]], v{{[0-9]+}}, s{{[0-9]+}}{{$}} ; GFX1032: v_subrev_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[A0]]{{$}} -; GFX1064: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]|vcc]], v{{[0-9]+}}, s{{[0-9]+}}{{$}} +; GFX1064: v_sub_co_u32 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]|vcc]], v{{[0-9]+}}, s{{[0-9]+}}{{$}} ; GFX1064: v_subrev_co_ci_u32_e32 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[A0]]{{$}} define amdgpu_kernel void @test_subbrev_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 { bb: @@ -315,9 +315,9 @@ } ; GCN-LABEL: {{^}}test_subb_vop2b: -; GFX1032: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s[0-9]+|vcc_lo]], s{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX1032: v_sub_co_u32 v{{[0-9]+}}, [[A0:s[0-9]+|vcc_lo]], s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX1032: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, v{{[0-9]+}}, [[A0]]{{$}} -; GFX1064: v_sub_co_u32_e64 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]|vcc]], s{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GFX1064: v_sub_co_u32 v{{[0-9]+}}, [[A0:s\[[0-9:]+\]|vcc]], s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX1064: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, v{{[0-9]+}}, [[A0]]{{$}} define amdgpu_kernel void @test_subb_vop2b(i64 addrspace(1)* %arg, i64 %arg1) #0 { bb: @@ -330,24 +330,24 @@ } ; GCN-LABEL: {{^}}test_udiv64: -; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, [[SDST:s[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} +; GFX1032: v_add_co_u32 v{{[0-9]+}}, [[SDST:s[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GFX1032: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, 0, v{{[0-9]+}}, vcc_lo ; GFX1032: v_add_co_ci_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}}, [[SDST]] -; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}} -; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}} -; GFX1032: v_add_co_u32_e64 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1032: v_add_co_u32 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1032: v_add_co_u32 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1032: v_add_co_u32 v{{[0-9]+}}, vcc_lo, v{{[0-9]+}}, v{{[0-9]+}} ; GFX1032: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, 0, v{{[0-9]+}}, vcc_lo -; GFX1032: v_sub_co_u32_e64 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, v{{[0-9]+}} +; GFX1032: v_sub_co_u32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, v{{[0-9]+}} ; GFX1032: v_subrev_co_ci_u32_e64 v{{[0-9]+}}, s{{[0-9]+}}, {{[vs][0-9]+}}, v{{[0-9]+}}, vcc_lo ; GFX1032: v_sub_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, {{[vs][0-9]+}}, v{{[0-9]+}}, vcc_lo -; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, [[SDST:s\[[0-9:]+\]]], v{{[0-9]+}}, v{{[0-9]+}} +; GFX1064: v_add_co_u32 v{{[0-9]+}}, [[SDST:s\[[0-9:]+\]]], v{{[0-9]+}}, v{{[0-9]+}} ; GFX1064: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc{{$}} ; GFX1064: v_add_co_ci_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[SDST]] -; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} -; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} -; GFX1064: v_add_co_u32_e64 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1064: v_add_co_u32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1064: v_add_co_u32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1064: v_add_co_u32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GFX1064: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc{{$}} -; GFX1064: v_sub_co_u32_e64 v{{[0-9]+}}, s[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}} +; GFX1064: v_sub_co_u32 v{{[0-9]+}}, s[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}} ; GFX1064: v_subrev_co_ci_u32_e64 v{{[0-9]+}}, vcc, {{[vs][0-9]+}}, v{{[0-9]+}}, s[{{[0-9:]+}}] ; GFX1064: v_sub_co_ci_u32_e64 v{{[0-9]+}}, s[{{[0-9:]+}}], {{[vs][0-9]+}}, v{{[0-9]+}}, s[{{[0-9:]+}}] define amdgpu_kernel void @test_udiv64(i64 addrspace(1)* %arg) #0 { diff --git a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s --- a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s @@ -851,112 +851,112 @@ // GFX90A: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18] v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] v_mul_legacy_f32_e64 v5, v1, v2 -// GFX90A: v_mul_legacy_f32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v255, v1, v2 ; encoding: [0xff,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] v_mul_legacy_f32_e64 v255, v1, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xff,0x05,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, v255, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xff,0x05,0x02,0x00] v_mul_legacy_f32_e64 v5, v255, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, s1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, s1, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, s101, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x65,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, s101, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x65,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, s101, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6a,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6a,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, vcc_lo, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6b,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6b,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, vcc_hi, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7c,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, m0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7c,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, m0, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7e,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7e,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, exec_lo, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7f,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7f,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, exec_hi, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x80,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, 0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x80,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, 0, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xc1,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, -1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xc1,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, -1, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf0,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf0,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, 0.5, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf7,0x04,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf7,0x04,0x02,0x00] v_mul_legacy_f32_e64 v5, -4.0, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x03,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, v255 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x03,0x00] v_mul_legacy_f32_e64 v5, v1, v255 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x00,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, s2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x00,0x00] v_mul_legacy_f32_e64 v5, v1, s2 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, s101 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xcb,0x00,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, s101 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xcb,0x00,0x00] v_mul_legacy_f32_e64 v5, v1, s101 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd5,0x00,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd5,0x00,0x00] v_mul_legacy_f32_e64 v5, v1, vcc_lo -// GFX90A: v_mul_legacy_f32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd7,0x00,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd7,0x00,0x00] v_mul_legacy_f32_e64 v5, v1, vcc_hi -// GFX90A: v_mul_legacy_f32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xf9,0x00,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, m0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xf9,0x00,0x00] v_mul_legacy_f32_e64 v5, v1, m0 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xfd,0x00,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xfd,0x00,0x00] v_mul_legacy_f32_e64 v5, v1, exec_lo -// GFX90A: v_mul_legacy_f32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x00,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x00,0x00] v_mul_legacy_f32_e64 v5, v1, exec_hi -// GFX90A: v_mul_legacy_f32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x01,0x01,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, 0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x01,0x01,0x00] v_mul_legacy_f32_e64 v5, v1, 0 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x83,0x01,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, -1 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x83,0x01,0x00] v_mul_legacy_f32_e64 v5, v1, -1 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xe1,0x01,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xe1,0x01,0x00] v_mul_legacy_f32_e64 v5, v1, 0.5 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xef,0x01,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xef,0x01,0x00] v_mul_legacy_f32_e64 v5, v1, -4.0 -// GFX90A: v_mul_legacy_f32_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x20] +// GFX90A: v_mul_legacy_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x20] v_mul_legacy_f32_e64 v5, -v1, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x40] +// GFX90A: v_mul_legacy_f32 v5, v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x40] v_mul_legacy_f32_e64 v5, v1, -v2 -// GFX90A: v_mul_legacy_f32_e64 v5, -v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x60] +// GFX90A: v_mul_legacy_f32 v5, -v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x60] v_mul_legacy_f32_e64 v5, -v1, -v2 -// GFX90A: v_mul_legacy_f32_e64 v5, |v1|, v2 ; encoding: [0x05,0x01,0xa1,0xd2,0x01,0x05,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, |v1|, v2 ; encoding: [0x05,0x01,0xa1,0xd2,0x01,0x05,0x02,0x00] v_mul_legacy_f32_e64 v5, |v1|, v2 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, |v2| ; encoding: [0x05,0x02,0xa1,0xd2,0x01,0x05,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, |v2| ; encoding: [0x05,0x02,0xa1,0xd2,0x01,0x05,0x02,0x00] v_mul_legacy_f32_e64 v5, v1, |v2| -// GFX90A: v_mul_legacy_f32_e64 v5, |v1|, |v2| ; encoding: [0x05,0x03,0xa1,0xd2,0x01,0x05,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, |v1|, |v2| ; encoding: [0x05,0x03,0xa1,0xd2,0x01,0x05,0x02,0x00] v_mul_legacy_f32_e64 v5, |v1|, |v2| -// GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 clamp ; encoding: [0x05,0x80,0xa1,0xd2,0x01,0x05,0x02,0x00] +// GFX90A: v_mul_legacy_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0xa1,0xd2,0x01,0x05,0x02,0x00] v_mul_legacy_f32_e64 v5, v1, v2 clamp -// GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 mul:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x08] +// GFX90A: v_mul_legacy_f32 v5, v1, v2 mul:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x08] v_mul_legacy_f32_e64 v5, v1, v2 mul:2 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 mul:4 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x10] +// GFX90A: v_mul_legacy_f32 v5, v1, v2 mul:4 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x10] v_mul_legacy_f32_e64 v5, v1, v2 mul:4 -// GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 div:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x18] +// GFX90A: v_mul_legacy_f32 v5, v1, v2 div:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x18] v_mul_legacy_f32_e64 v5, v1, v2 div:2 // GFX90A: v_xor_b32_dpp v6, v29, v27 row_newbcast:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x36,0x0c,0x2a,0x1d,0x50,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/literalv216.s b/llvm/test/MC/AMDGPU/literalv216.s --- a/llvm/test/MC/AMDGPU/literalv216.s +++ b/llvm/test/MC/AMDGPU/literalv216.s @@ -283,4 +283,4 @@ // FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid v_pk_fmac_f16 v5, 0x12345678, v2 // NOGFX9: error: instruction not supported on this GPU -// GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] +// GFX10: v_pk_fmac_f16 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s --- a/llvm/test/MC/AMDGPU/vop3-literal.s +++ b/llvm/test/MC/AMDGPU/vop3-literal.s @@ -52,15 +52,15 @@ // GFX10-ERR: error: invalid operand (violates constant bus restrictions) v_bfm_b32_e64 v0, 0x3039, s1 -// GFX10: v_bfm_b32_e64 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00] +// GFX10: v_bfm_b32 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_bfm_b32_e64 v0, 0x3039, v1 -// GFX10: v_bfm_b32_e64 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00] +// GFX10: v_bfm_b32 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_bfm_b32_e64 v0, 0x3039, 0x3039 -// GFX10: v_bfm_b32_e64 v0, 0x3039, 0x3039 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0xfe,0x01,0x00,0x39,0x30,0x00,0x00] +// GFX10: v_bfm_b32 v0, 0x3039, 0x3039 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0xfe,0x01,0x00,0x39,0x30,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_bfm_b32_e64 v0, 0x3039, 0x3038 @@ -160,15 +160,15 @@ // GFX9-ERR: error: literal operands are not supported v_max_i16_e64 v5, 0xfe0b, v2 -// GFX10: v_max_i16_e64 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] +// GFX10: v_max_i16 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_max_i16_e64 v5, v1, 0x123 -// GFX10: v_max_i16_e64 v5, v1, 0x123 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x01,0x00,0x23,0x01,0x00,0x00] +// GFX10: v_max_i16 v5, v1, 0x123 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x01,0x00,0x23,0x01,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_max_i16_e64 v5, 0x1234, 0x1234 -// GFX10: v_max_i16_e64 v5, 0x1234, 0x1234 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0xfe,0x01,0x00,0x34,0x12,0x00,0x00] +// GFX10: v_max_i16 v5, 0x1234, 0x1234 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0xfe,0x01,0x00,0x34,0x12,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_min3_i16 v5, 0xfe0b, v2, v3 @@ -196,19 +196,19 @@ // GFX9-ERR: error: literal operands are not supported v_add_nc_u16 v5, 0xfe0b, v2 -// GFX10: v_add_nc_u16_e64 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] +// GFX10: v_add_nc_u16 v5, 0xfe0b, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x0b,0xfe,0x00,0x00] // GFX9-ERR: error: instruction not supported on this GPU v_add_nc_u16 v5, v1, 0x1234 -// GFX10: v_add_nc_u16_e64 v5, v1, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x34,0x12,0x00,0x00] +// GFX10: v_add_nc_u16 v5, v1, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x34,0x12,0x00,0x00] // GFX9-ERR: error: instruction not supported on this GPU v_add_nc_u16 v5, 0x1234, 0x1234 -// GFX10: v_add_nc_u16_e64 v5, 0x1234, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfe,0x01,0x00,0x34,0x12,0x00,0x00] +// GFX10: v_add_nc_u16 v5, 0x1234, 0x1234 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0xfe,0x01,0x00,0x34,0x12,0x00,0x00] // GFX9-ERR: error: instruction not supported on this GPU v_ashrrev_i16_e64 v5, 0x3456, v2 -// GFX10: v_ashrrev_i16_e64 v5, 0x3456, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] +// GFX10: v_ashrrev_i16 v5, 0x3456, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x56,0x34,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_mad_u16 v5, 0xfe0b, v2, v3 diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s --- a/llvm/test/MC/AMDGPU/wave32.s +++ b/llvm/test/MC/AMDGPU/wave32.s @@ -296,11 +296,11 @@ // GFX1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00] v_add_co_u32 v0, s0, v0, v2 -// GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX1032: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] // GFX1064-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction v_add_co_u32_e64 v0, s0, v0, v2 -// GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX1032: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] // GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 @@ -308,11 +308,11 @@ // GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction v_sub_co_u32 v0, s0, v0, v2 -// GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX1032: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] // GFX1064-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction v_sub_co_u32_e64 v0, s0, v0, v2 -// GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX1032: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] // GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 @@ -320,11 +320,11 @@ // GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction v_subrev_co_u32 v0, s0, v0, v2 -// GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX1032: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] // GFX1064-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction v_subrev_co_u32_e64 v0, s0, v0, v2 -// GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX1032: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] // GFX1064-ERR: :[[@LINE-2]]:33: error: invalid operand for instruction v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 @@ -333,11 +333,11 @@ v_add_co_u32 v0, s[0:1], v0, v2 // GFX1032-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction -// GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX1064: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_u32_e64 v0, s[0:1], v0, v2 // GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction -// GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX1064: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] // GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction @@ -345,11 +345,11 @@ v_sub_co_u32 v0, s[0:1], v0, v2 // GFX1032-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction -// GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX1064: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_u32_e64 v0, s[0:1], v0, v2 // GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction -// GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX1064: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] // GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction @@ -357,11 +357,11 @@ v_subrev_co_u32 v0, s[0:1], v0, v2 // GFX1032-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction -// GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX1064: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_u32_e64 v0, s[0:1], v0, v2 // GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction -// GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX1064: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] // GFX1032-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s --- a/llvm/test/MC/AMDGPU/wave_any.s +++ b/llvm/test/MC/AMDGPU/wave_any.s @@ -127,61 +127,61 @@ // GFX10: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00] v_add_co_u32 v0, s0, v0, v2 -// GFX10: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_u32_e64 v0, s0, v0, v2 -// GFX10: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 // GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00] v_sub_co_u32 v0, s0, v0, v2 -// GFX10: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_u32_e64 v0, s0, v0, v2 -// GFX10: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 // GFX10: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00] v_subrev_co_u32 v0, s0, v0, v2 -// GFX10: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_u32_e64 v0, s0, v0, v2 -// GFX10: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 // GFX10: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00] v_add_co_u32 v0, s[0:1], v0, v2 -// GFX10: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_u32 v0, exec, v0, v2 -// GFX10: v_add_co_u32_e64 v0, exec, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_add_co_u32 v0, exec, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_u32 v0, exec_lo, v0, v2 -// GFX10: v_add_co_u32_e64 v0, exec_lo, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_add_co_u32 v0, exec_lo, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_u32_e64 v0, s[0:1], v0, v2 -// GFX10: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00] v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] // GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00] v_sub_co_u32 v0, s[0:1], v0, v2 -// GFX10: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_u32_e64 v0, s[0:1], v0, v2 -// GFX10: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00] v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] // GFX10: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00] v_subrev_co_u32 v0, s[0:1], v0, v2 -// GFX10: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_u32_e64 v0, s[0:1], v0, v2 -// GFX10: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] +// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00] v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] // GFX10: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -20086,116 +20086,116 @@ # W64: v_add_co_ci_u32_sdwa v5, vcc, v255, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x50,0xff,0x06,0x06,0x06] 0xf9,0x04,0x0a,0x50,0xff,0x06,0x06,0x06 -# W32: v_add_co_u32_e64 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] -# W64: v_add_co_u32_e64 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] +# W32: v_add_co_u32 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] +# W64: v_add_co_u32 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xc1,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xc1,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xc1,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0xc1,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf7,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf7,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf7,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0xf7,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x80,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x80,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x80,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x80,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf0,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf0,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf0,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0xf0,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7f,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7f,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7f,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x7f,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7e,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7e,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7e,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x7e,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7c,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7c,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7c,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x7c,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x01,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x67,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x67,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x67,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x67,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, ttmp11, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x77,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], ttmp11, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x77,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, ttmp11, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x77,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], ttmp11, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x77,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x83,0x01,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x83,0x01,0x00] +# W32: v_add_co_u32 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x83,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x0f,0xd7,0x01,0x83,0x01,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x01,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x01,0x00] +# W32: v_add_co_u32 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xef,0x01,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x01,0x01,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x01,0x01,0x00] +# W32: v_add_co_u32 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x01,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x0f,0xd7,0x01,0x01,0x01,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xe1,0x01,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xe1,0x01,0x00] +# W32: v_add_co_u32 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xe1,0x01,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xe1,0x01,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xff,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xfd,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xfd,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xfd,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xfd,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xf9,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xf9,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xf9,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xf9,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xcf,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xcf,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xcf,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xcf,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0x05,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, ttmp11 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, ttmp11 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, ttmp11 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, ttmp11 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xef,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] +# W32: v_add_co_u32 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00] +# W32: v_add_co_u32 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd7,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd7,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd7,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xd7,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd5,0x00,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd5,0x00,0x00] +# W32: v_add_co_u32 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd5,0x00,0x00] +# W64: v_add_co_u32 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x0f,0xd7,0x01,0xd5,0x00,0x00 -# W32: v_add_co_u32_e64 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xff,0x05,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xff,0x05,0x02,0x00] +# W32: v_add_co_u32 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xff,0x05,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x0f,0xd7,0xff,0x05,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6b,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6b,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6b,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x6b,0x04,0x02,0x00 -# W32: v_add_co_u32_e64 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6a,0x04,0x02,0x00] -# W64: v_add_co_u32_e64 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6a,0x04,0x02,0x00] +# W32: v_add_co_u32 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6a,0x04,0x02,0x00] +# W64: v_add_co_u32 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x0f,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_add_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x65,0x01,0xe4,0x00,0x00] @@ -21275,85 +21275,85 @@ # GFX10: v_add_nc_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x7f,0xd7,0x02,0x07,0x02,0x00] 0x01,0x80,0x7f,0xd7,0x02,0x07,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_add_nc_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x03,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_add_nc_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x03,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_add_nc_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x03,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_add_nc_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x03,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_add_nc_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x03,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_add_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x03,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_add_nc_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x03,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_add_nc_u16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_add_nc_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x03,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x03,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_add_nc_u16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_add_nc_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x03,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x03,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x03,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_add_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_add_nc_u16 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x80,0x03,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_add_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4b] @@ -22178,82 +22178,82 @@ # GFX10: v_and_or_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x71,0xd7,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x71,0xd7,0x6a,0x04,0x0e,0x04 -# GFX10: v_ashrrev_i16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x08,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_ashrrev_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x08,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x08,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x08,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x08,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_ashrrev_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x08,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_ashrrev_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x08,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x08,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_ashrrev_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x08,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_ashrrev_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x08,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x08,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_ashrrev_i16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x08,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x08,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_ashrrev_i16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_ashrrev_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x08,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x08,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_ashrrev_i32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x31,0x01,0xe4,0x00,0x00] @@ -22631,88 +22631,88 @@ # GFX10: v_ashrrev_i64 v[5:6], vcc_lo, v[2:3] ; encoding: [0x05,0x00,0x01,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x01,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x64,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x64,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x64,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x64,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x64,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x64,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_bcnt_u32_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x64,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x64,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x64,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x64,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_bcnt_u32_b32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_bcnt_u32_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x64,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x64,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_bfe_i32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x49,0xd5,0x01,0x05,0x0e,0x04] @@ -23084,88 +23084,88 @@ # GFX10: v_bfi_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x4a,0xd5,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x4a,0xd5,0x6a,0x04,0x0e,0x04 -# GFX10: v_bfm_b32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_bfm_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_bfm_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x63,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_bfm_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x63,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_bfm_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x63,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_bfm_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x63,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_bfm_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x63,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_bfm_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x63,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_bfm_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x63,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x63,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_bfm_b32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_bfm_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x63,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x63,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_bfm_b32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_bfm_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x63,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x63,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_bfrev_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x70,0xfe,0x7f,0x01,0xe4,0x00,0x00] @@ -69219,172 +69219,172 @@ # GFX10: v_cvt_off_f32_i4_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x1c,0x0a,0x7e,0x6a,0x06,0x86,0x00] 0xf9,0x1c,0x0a,0x7e,0x6a,0x06,0x86,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x6b,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v255, v1, v2 ; encoding: [0xff,0x00,0x6b,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x6b,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, -1, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, 0, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, m0, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, s1, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, s103, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, -1 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x6b,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, 0 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x6b,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, m0 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, s103 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, s2 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, v255 ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x6b,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x6b,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, v255, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x6b,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_cvt_pk_i16_i32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_cvt_pk_i16_i32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x6b,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x6b,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x6a,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v255, v1, v2 ; encoding: [0xff,0x00,0x6a,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x6a,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, -1, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, 0, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, m0, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, s1, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, s103, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, -1 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x6a,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, 0 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x6a,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, m0 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, s103 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, s2 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, v255 ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x6a,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x6a,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, v255, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x6a,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_cvt_pk_u16_u32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_cvt_pk_u16_u32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x6a,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x6a,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_cvt_pk_u8_f32 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x5e,0xd5,0x01,0x05,0x0e,0x04] @@ -69627,109 +69627,109 @@ # GFX10: v_cvt_pknorm_i16_f16 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x12,0xd7,0x01,0x05,0x02,0x00] 0x05,0x03,0x12,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v255, v1, v2 ; encoding: [0xff,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x68,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, -1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, -v1, -v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x60] +# GFX10: v_cvt_pknorm_i16_f32 v5, -v1, -v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x60] 0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x60 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x20] +# GFX10: v_cvt_pknorm_i16_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x20] 0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x20 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, 0, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, m0, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, s1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, s103, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, -1 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x68,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x68,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x40] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, -v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x40] 0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x40 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, 0 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x68,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x68,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, m0 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, s103 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x68,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x68,0xd7,0x01,0x05,0x02,0x00] 0x05,0x80,0x68,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, v255 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x68,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x68,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x68,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v1, |v2| ; encoding: [0x05,0x02,0x68,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v1, |v2| ; encoding: [0x05,0x02,0x68,0xd7,0x01,0x05,0x02,0x00] 0x05,0x02,0x68,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, v255, v2 ; encoding: [0x05,0x00,0x68,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x68,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x68,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, |v1|, v2 ; encoding: [0x05,0x01,0x68,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, |v1|, v2 ; encoding: [0x05,0x01,0x68,0xd7,0x01,0x05,0x02,0x00] 0x05,0x01,0x68,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_i16_f32_e64 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x68,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_i16_f32 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x68,0xd7,0x01,0x05,0x02,0x00] 0x05,0x03,0x68,0xd7,0x01,0x05,0x02,0x00 # GFX10: v_cvt_pknorm_u16_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] @@ -69840,109 +69840,109 @@ # GFX10: v_cvt_pknorm_u16_f16 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x13,0xd7,0x01,0x05,0x02,0x00] 0x05,0x03,0x13,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x69,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v255, v1, v2 ; encoding: [0xff,0x00,0x69,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x69,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, -1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, -v1, -v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x60] +# GFX10: v_cvt_pknorm_u16_f32 v5, -v1, -v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x60] 0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x60 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x20] +# GFX10: v_cvt_pknorm_u16_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x20] 0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x20 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, 0, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, m0, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, s1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, s103, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, -1 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x69,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x69,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x40] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, -v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x40] 0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x40 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, 0 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x69,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x69,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, m0 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, s103 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x69,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x69,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x69,0xd7,0x01,0x05,0x02,0x00] 0x05,0x80,0x69,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, v255 ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x69,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x69,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x69,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v1, |v2| ; encoding: [0x05,0x02,0x69,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v1, |v2| ; encoding: [0x05,0x02,0x69,0xd7,0x01,0x05,0x02,0x00] 0x05,0x02,0x69,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, v255, v2 ; encoding: [0x05,0x00,0x69,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x69,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x69,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x69,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, |v1|, v2 ; encoding: [0x05,0x01,0x69,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, |v1|, v2 ; encoding: [0x05,0x01,0x69,0xd7,0x01,0x05,0x02,0x00] 0x05,0x01,0x69,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_cvt_pknorm_u16_f32_e64 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x69,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_cvt_pknorm_u16_f32 v5, |v1|, |v2| ; encoding: [0x05,0x03,0x69,0xd7,0x01,0x05,0x02,0x00] 0x05,0x03,0x69,0xd7,0x01,0x05,0x02,0x00 # GFX10: v_cvt_pkrtz_f16_f32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x5f] @@ -76412,106 +76412,106 @@ # GFX10: v_ldexp_f16_sdwa v5, |v1|, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x76,0x01,0x06,0x26,0x06] 0xf9,0x04,0x0a,0x76,0x01,0x06,0x26,0x06 -# GFX10: v_ldexp_f32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_ldexp_f32 v255, v1, v2 ; encoding: [0xff,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, -1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x20] +# GFX10: v_ldexp_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x20] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x20 -# GFX10: v_ldexp_f32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, 0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, m0, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, s1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, s103, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_ldexp_f32 v5, v1, -1 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x62,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_ldexp_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x62,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_ldexp_f32 v5, v1, 0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x62,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_ldexp_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x62,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, m0 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, s103 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, s2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_ldexp_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x62,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_ldexp_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x62,0xd7,0x01,0x05,0x02,0x00] 0x05,0x80,0x62,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, v2 div:2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x18] +# GFX10: v_ldexp_f32 v5, v1, v2 div:2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x18] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x18 -# GFX10: v_ldexp_f32_e64 v5, v1, v2 mul:2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x08] +# GFX10: v_ldexp_f32 v5, v1, v2 mul:2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x08] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x08 -# GFX10: v_ldexp_f32_e64 v5, v1, v2 mul:4 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x10] +# GFX10: v_ldexp_f32 v5, v1, v2 mul:4 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x10] 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x10 -# GFX10: v_ldexp_f32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_ldexp_f32 v5, v1, v255 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x62,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_ldexp_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x62,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x62,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_ldexp_f32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_ldexp_f32 v5, v255, v2 ; encoding: [0x05,0x00,0x62,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x62,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_ldexp_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x62,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_ldexp_f32_e64 v5, |v1|, v2 ; encoding: [0x05,0x01,0x62,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_ldexp_f32 v5, |v1|, v2 ; encoding: [0x05,0x01,0x62,0xd7,0x01,0x05,0x02,0x00] 0x05,0x01,0x62,0xd7,0x01,0x05,0x02,0x00 # GFX10: v_ldexp_f64 v[254:255], v[1:2], v2 ; encoding: [0xfe,0x00,0x68,0xd5,0x01,0x05,0x02,0x00] @@ -77480,82 +77480,82 @@ # GFX10: v_lshl_or_b32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x6f,0xd7,0x6a,0x04,0x0e,0x04] 0x05,0x00,0x6f,0xd7,0x6a,0x04,0x0e,0x04 -# GFX10: v_lshlrev_b16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x14,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_lshlrev_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x14,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x14,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x14,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x14,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, s101, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_lshlrev_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x14,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_lshlrev_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x14,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, s101 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x14,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_lshlrev_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x14,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_lshlrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_lshlrev_b16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x14,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x14,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_lshlrev_b16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_lshlrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x14,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x14,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_lshlrev_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x35,0x01,0xe4,0x00,0x00] @@ -77933,82 +77933,82 @@ # GFX10: v_lshlrev_b64 v[5:6], vcc_lo, v[2:3] ; encoding: [0x05,0x00,0xff,0xd6,0x6a,0x04,0x02,0x00] 0x05,0x00,0xff,0xd6,0x6a,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_lshrrev_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x07,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x07,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, s101, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_lshrrev_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x07,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_lshrrev_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x07,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, s101 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_lshrrev_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x07,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_lshrrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x07,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x07,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_lshrrev_b16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x07,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_lshrrev_b16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_lshrrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x07,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_lshrrev_b32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x2d,0x01,0xe4,0x00,0x00] @@ -81567,82 +81567,82 @@ # GFX10: v_max_f64 v[5:6], |v[1:2]|, |v[2:3]| ; encoding: [0x05,0x03,0x67,0xd5,0x01,0x05,0x02,0x00] 0x05,0x03,0x67,0xd5,0x01,0x05,0x02,0x00 -# GFX10: v_max_i16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_max_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_max_i16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_max_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x0a,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_max_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0a,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_max_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x0a,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_max_i16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_max_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_max_i16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_max_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x0a,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_max_i16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_max_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_max_i16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_max_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_max_i16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_max_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_max_i16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_max_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_max_i16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_max_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_max_i16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_max_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_max_i16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_max_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_max_i16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_max_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_max_i16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_max_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_max_i16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_max_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x0a,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_max_i16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_max_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x0a,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_max_i16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_max_i16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_max_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0a,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x0a,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_max_i32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x25,0x01,0xe4,0x00,0x00] @@ -81951,82 +81951,82 @@ # GFX10: v_max_i32_sdwa v5, vcc_lo, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x24,0x6a,0x06,0x86,0x06] 0xf9,0x04,0x0a,0x24,0x6a,0x06,0x86,0x06 -# GFX10: v_max_u16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_max_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_max_u16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_max_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x09,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_max_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x09,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_max_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x09,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_max_u16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_max_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_max_u16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_max_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x09,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_max_u16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_max_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_max_u16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_max_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_max_u16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_max_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_max_u16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_max_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_max_u16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_max_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_max_u16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_max_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_max_u16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_max_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_max_u16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_max_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x09,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_max_u16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_max_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_max_u16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_max_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x09,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x09,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_max_u16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_max_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x09,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x09,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_max_u16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_max_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_max_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x09,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_max_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x29,0x01,0xe4,0x00,0x00] @@ -82335,172 +82335,172 @@ # GFX10: v_max_u32_sdwa v5, vcc_lo, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x28,0x6a,0x06,0x86,0x06] 0xf9,0x04,0x0a,0x28,0x6a,0x06,0x86,0x06 -# GFX10: v_mbcnt_hi_u32_b32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x66,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x66,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x66,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x66,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x66,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x66,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x66,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x66,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x66,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x66,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x66,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_mbcnt_hi_u32_b32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_mbcnt_hi_u32_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x66,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x66,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x65,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, -1, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xf7,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, -4.0, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, 0, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xf0,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, 0.5, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, m0, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, s1, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, s103, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x67,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, s103, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x67,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x77,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, ttmp11, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x77,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, -1 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x65,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xef,0x01,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, -4.0 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x65,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, 0 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x65,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xe1,0x01,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, 0.5 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x65,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, s103 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xcf,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, s103 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0xcf,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xef,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, ttmp11 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0xef,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x65,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x65,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x65,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x65,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, v255, v2 ; encoding: [0x05,0x00,0x65,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x65,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_mbcnt_lo_u32_b32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_mbcnt_lo_u32_b32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x65,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x65,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_med3_f16 v255, v1, v2, v3 ; encoding: [0xff,0x00,0x57,0xd7,0x01,0x05,0x0e,0x04] @@ -84951,82 +84951,82 @@ # GFX10: v_min_f64 v[5:6], |v[1:2]|, |v[2:3]| ; encoding: [0x05,0x03,0x66,0xd5,0x01,0x05,0x02,0x00] 0x05,0x03,0x66,0xd5,0x01,0x05,0x02,0x00 -# GFX10: v_min_i16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_min_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_min_i16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_min_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x0c,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_min_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0c,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_min_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x0c,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_min_i16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_min_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_min_i16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_min_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x0c,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_min_i16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_min_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_min_i16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_min_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_min_i16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_min_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_min_i16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_min_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_min_i16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_min_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_min_i16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_min_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_min_i16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_min_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_min_i16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_min_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_min_i16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_min_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_min_i16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_min_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x0c,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_min_i16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_min_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x0c,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_min_i16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_min_i16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_min_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0c,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x0c,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_min_i32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x23,0x01,0xe4,0x00,0x00] @@ -85335,82 +85335,82 @@ # GFX10: v_min_i32_sdwa v5, vcc_lo, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x22,0x6a,0x06,0x86,0x06] 0xf9,0x04,0x0a,0x22,0x6a,0x06,0x86,0x06 -# GFX10: v_min_u16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_min_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_min_u16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_min_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x0b,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_min_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0b,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_min_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x0b,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_min_u16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_min_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_min_u16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_min_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x0b,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_min_u16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_min_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_min_u16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_min_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_min_u16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_min_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_min_u16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_min_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_min_u16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_min_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_min_u16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_min_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_min_u16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_min_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_min_u16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_min_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_min_u16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_min_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_min_u16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_min_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x0b,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_min_u16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_min_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x0b,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_min_u16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_min_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_min_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_min_u32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x27,0x01,0xe4,0x00,0x00] @@ -88617,82 +88617,82 @@ # GFX10: v_mul_legacy_f32_sdwa v5, |v1|, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x0e,0x01,0x06,0x26,0x06] 0xf9,0x04,0x0a,0x0e,0x01,0x06,0x26,0x06 -# GFX10: v_mul_lo_u16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_mul_lo_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x05,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x05,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_mul_lo_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x05,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_mul_lo_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x05,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_mul_lo_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x05,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_mul_lo_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x05,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x05,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_mul_lo_u16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x05,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x05,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_mul_lo_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_mul_lo_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x05,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_mul_lo_u32 v255, v1, v2 ; encoding: [0xff,0x00,0x69,0xd5,0x01,0x05,0x02,0x00] @@ -90339,52 +90339,52 @@ # GFX10: v_pipeflush ; encoding: [0x00,0x36,0x00,0x7e] 0x00,0x36,0x00,0x7e -# GFX10: v_pk_fmac_f16_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79] +# GFX10: v_pk_fmac_f16 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x79] 0x01,0x05,0xfe,0x79 -# GFX10: v_pk_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] 0xc1,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, -4.0, v2 ; encoding: [0xf7,0x04,0x0a,0x78] 0xf7,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, 0, v2 ; encoding: [0x80,0x04,0x0a,0x78] 0x80,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] 0xf0,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] 0x7f,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] 0x7e,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7c,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7c,0x04,0x0a,0x78] 0x7c,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] 0x01,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, s103, v2 ; encoding: [0x67,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, s103, v2 ; encoding: [0x67,0x04,0x0a,0x78] 0x67,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, ttmp11, v2 ; encoding: [0x77,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, ttmp11, v2 ; encoding: [0x77,0x04,0x0a,0x78] 0x77,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] 0x01,0x05,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78] +# GFX10: v_pk_fmac_f16 v5, v1, v255 ; encoding: [0x01,0xff,0x0b,0x78] 0x01,0xff,0x0b,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] 0xff,0x05,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] 0x6b,0x04,0x0a,0x78 -# GFX10: v_pk_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] +# GFX10: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] 0x6a,0x04,0x0a,0x78 # GFX10: v_qsad_pk_u16_u8 v[254:255], v[1:2], v2, v[3:4] ; encoding: [0xfe,0x00,0x72,0xd5,0x01,0x05,0x0e,0x04] @@ -94475,116 +94475,116 @@ # W64: v_sub_co_ci_u32_sdwa v5, vcc, v255, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x52,0xff,0x06,0x06,0x06] 0xf9,0x04,0x0a,0x52,0xff,0x06,0x06,0x06 -# W32: v_sub_co_u32_e64 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] -# W64: v_sub_co_u32_e64 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] +# W32: v_sub_co_u32 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] +# W64: v_sub_co_u32 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xc1,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xc1,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xc1,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0xc1,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf7,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf7,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf7,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0xf7,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x80,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x80,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x80,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x80,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf0,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf0,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf0,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0xf0,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7f,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7f,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7f,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x7f,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7e,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7e,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7e,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x7e,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7c,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7c,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7c,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x7c,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x01,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x67,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x67,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x67,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x67,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, ttmp11, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x77,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], ttmp11, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x77,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, ttmp11, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x77,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], ttmp11, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x77,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x83,0x01,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x83,0x01,0x00] +# W32: v_sub_co_u32 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x83,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x10,0xd7,0x01,0x83,0x01,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x01,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x01,0x00] +# W32: v_sub_co_u32 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x10,0xd7,0x01,0xef,0x01,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x01,0x01,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x01,0x01,0x00] +# W32: v_sub_co_u32 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x01,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x10,0xd7,0x01,0x01,0x01,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xe1,0x01,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xe1,0x01,0x00] +# W32: v_sub_co_u32 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xe1,0x01,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x10,0xd7,0x01,0xe1,0x01,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0xff,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xfd,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xfd,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xfd,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0xfd,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xf9,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xf9,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xf9,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0xf9,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xcf,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xcf,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xcf,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0xcf,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0x05,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, ttmp11 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, ttmp11 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, ttmp11 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, ttmp11 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0xef,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00] +# W32: v_sub_co_u32 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd7,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd7,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd7,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0xd7,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd5,0x00,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd5,0x00,0x00] +# W32: v_sub_co_u32 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd5,0x00,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x10,0xd7,0x01,0xd5,0x00,0x00 -# W32: v_sub_co_u32_e64 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xff,0x05,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xff,0x05,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xff,0x05,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x10,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x10,0xd7,0xff,0x05,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6b,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6b,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6b,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x6b,0x04,0x02,0x00 -# W32: v_sub_co_u32_e64 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6a,0x04,0x02,0x00] -# W64: v_sub_co_u32_e64 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6a,0x04,0x02,0x00] +# W32: v_sub_co_u32 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6a,0x04,0x02,0x00] +# W64: v_sub_co_u32 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x10,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_sub_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x67,0x01,0xe4,0x00,0x00] @@ -95448,85 +95448,85 @@ # GFX10: v_sub_nc_i32 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x76,0xd7,0x02,0x07,0x02,0x00] 0x01,0x80,0x76,0xd7,0x02,0x07,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_sub_nc_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x04,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, -1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0xc1,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x04,0xd7,0xf7,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, 0, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x80,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x80,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x04,0xd7,0xf0,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x7f,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x7e,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, m0, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x7c,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, s1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x01,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, s101, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x65,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x65,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x65,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x83,0x01,0x00] +# GFX10: v_sub_nc_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x04,0xd7,0x01,0x83,0x01,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0xc4,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xef,0x01,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, 0 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x01,0x01,0x00] +# GFX10: v_sub_nc_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x04,0xd7,0x01,0x01,0x01,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x01,0x00,0x00,0x38,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xe1,0x01,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xff,0x00,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xfd,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xfd,0x00,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, m0 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xf9,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xf9,0x00,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, s101 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xcb,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xcb,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xcb,0x00,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0x05,0x00,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x04,0xd7,0x01,0x05,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x03,0x00] +# GFX10: v_sub_nc_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x04,0xd7,0x01,0xff,0x03,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xd7,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xd7,0x00,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xd5,0x00,0x00] +# GFX10: v_sub_nc_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x04,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x04,0xd7,0x01,0xd5,0x00,0x00 -# GFX10: v_sub_nc_u16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x05,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x04,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x04,0xd7,0xff,0x05,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x6b,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x04,0xd7,0x6a,0x04,0x02,0x00 -# GFX10: v_sub_nc_u16_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00] +# GFX10: v_sub_nc_u16 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x80,0x04,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_sub_nc_u32_e32 v255, v1, v2 ; encoding: [0x01,0x05,0xfe,0x4d] @@ -95972,116 +95972,116 @@ # W64: v_subrev_co_ci_u32_sdwa v5, vcc, v255, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x54,0xff,0x06,0x06,0x06] 0xf9,0x04,0x0a,0x54,0xff,0x06,0x06,0x06 -# W32: v_subrev_co_u32_e64 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] -# W64: v_subrev_co_u32_e64 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] +# W32: v_subrev_co_u32 v255, s0, v1, v2 ; encoding: [0xff,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] +# W64: v_subrev_co_u32 v255, s[0:1], v1, v2 ; encoding: [0xff,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] 0xff,0x00,0x19,0xd7,0x01,0x05,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xc1,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xc1,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, -1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xc1,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], -1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xc1,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0xc1,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf7,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf7,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, -4.0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf7,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], -4.0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf7,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0xf7,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x80,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x80,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, 0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x80,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], 0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x80,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x80,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf0,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf0,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, 0.5, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf0,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], 0.5, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xf0,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0xf0,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7f,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7f,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, exec_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7f,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], exec_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7f,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x7f,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7e,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7e,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, exec_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7e,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], exec_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7e,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x7e,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7c,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7c,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, m0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7c,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], m0, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x7c,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x7c,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, s1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], s1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x01,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x67,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x67,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, s103, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x67,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], s103, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x67,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x67,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, ttmp11, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x77,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], ttmp11, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x77,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, ttmp11, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x77,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], ttmp11, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x77,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x77,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x83,0x01,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x83,0x01,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, -1 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x83,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, -1 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x83,0x01,0x00] 0x05,0x00,0x19,0xd7,0x01,0x83,0x01,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x01,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x01,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, -4.0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, -4.0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x01,0x00] 0x05,0x00,0x19,0xd7,0x01,0xef,0x01,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x01,0x01,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x01,0x01,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, 0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x01,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, 0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x01,0x01,0x00] 0x05,0x00,0x19,0xd7,0x01,0x01,0x01,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xe1,0x01,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xe1,0x01,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, 0.5 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xe1,0x01,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, 0.5 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xe1,0x01,0x00] 0x05,0x00,0x19,0xd7,0x01,0xe1,0x01,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, exec_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, exec_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0xff,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xfd,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xfd,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, exec_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xfd,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, exec_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xfd,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0xfd,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xf9,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xf9,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, m0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xf9,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, m0 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xf9,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0xf9,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xcf,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xcf,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, s103 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xcf,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, s103 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xcf,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0xcf,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, s2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, s2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0x05,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, ttmp11 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, ttmp11 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, ttmp11 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, ttmp11 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xef,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0xef,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x19,0xd7,0x01,0x05,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x03,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x03,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, v255 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x03,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, v255 ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xff,0x03,0x00] 0x05,0x00,0x19,0xd7,0x01,0xff,0x03,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd7,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd7,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, vcc_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd7,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, vcc_hi ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd7,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0xd7,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd5,0x00,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd5,0x00,0x00] +# W32: v_subrev_co_u32 v5, s0, v1, vcc_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd5,0x00,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v1, vcc_lo ; encoding: [0x05,0x00,0x19,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x19,0xd7,0x01,0xd5,0x00,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xff,0x05,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xff,0x05,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, v255, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xff,0x05,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], v255, v2 ; encoding: [0x05,0x00,0x19,0xd7,0xff,0x05,0x02,0x00] 0x05,0x00,0x19,0xd7,0xff,0x05,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6b,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6b,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, vcc_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6b,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], vcc_hi, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6b,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x6b,0x04,0x02,0x00 -# W32: v_subrev_co_u32_e64 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6a,0x04,0x02,0x00] -# W64: v_subrev_co_u32_e64 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6a,0x04,0x02,0x00] +# W32: v_subrev_co_u32 v5, s0, vcc_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6a,0x04,0x02,0x00] +# W64: v_subrev_co_u32 v5, s[0:1], vcc_lo, v2 ; encoding: [0x05,0x00,0x19,0xd7,0x6a,0x04,0x02,0x00] 0x05,0x00,0x19,0xd7,0x6a,0x04,0x02,0x00 # GFX10: v_subrev_f16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0xfe,0x69,0x01,0xe4,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt --- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt @@ -644,112 +644,112 @@ # GFX90A: v_fmac_f64_e64 v[4:5], v[2:3], v[8:9] div:2 ; encoding: [0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18] 0x04,0x00,0x04,0xd1,0x02,0x11,0x02,0x18 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v255, v1, v2 ; encoding: [0xff,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v255, v1, v2 ; encoding: [0xff,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00] 0xff,0x00,0xa1,0xd2,0x01,0x05,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v255, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xff,0x05,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, v255, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xff,0x05,0x02,0x00] 0x05,0x00,0xa1,0xd2,0xff,0x05,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, s1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, s1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x01,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, s101, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x65,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, s101, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x65,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x65,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6a,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6a,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x6a,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6b,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x6b,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x6b,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, m0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7c,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, m0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7c,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x7c,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, exec_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7e,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, exec_lo, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7e,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x7e,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, exec_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7f,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, exec_hi, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x7f,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x7f,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, 0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x80,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, 0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x80,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0x80,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, -1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xc1,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, -1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xc1,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0xc1,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, 0.5, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf0,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, 0.5, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf0,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0xf0,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, -4.0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf7,0x04,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, -4.0, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0xf7,0x04,0x02,0x00] 0x05,0x00,0xa1,0xd2,0xf7,0x04,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, v255 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x03,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, v255 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x03,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xff,0x03,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, s2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x00,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, s2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x00,0x00] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x00,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, s101 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xcb,0x00,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, s101 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xcb,0x00,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xcb,0x00,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd5,0x00,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd5,0x00,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xd5,0x00,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd7,0x00,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xd7,0x00,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xd7,0x00,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, m0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xf9,0x00,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, m0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xf9,0x00,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xf9,0x00,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, exec_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xfd,0x00,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, exec_lo ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xfd,0x00,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xfd,0x00,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, exec_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x00,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, exec_hi ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xff,0x00,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xff,0x00,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, 0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x01,0x01,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, 0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x01,0x01,0x00] 0x05,0x00,0xa1,0xd2,0x01,0x01,0x01,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, -1 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x83,0x01,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, -1 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x83,0x01,0x00] 0x05,0x00,0xa1,0xd2,0x01,0x83,0x01,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, 0.5 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xe1,0x01,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, 0.5 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xe1,0x01,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xe1,0x01,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, -4.0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xef,0x01,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, -4.0 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0xef,0x01,0x00] 0x05,0x00,0xa1,0xd2,0x01,0xef,0x01,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, -v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x20] +# GFX90A: v_mul_legacy_f32 v5, -v1, v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x20] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x20 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x40] +# GFX90A: v_mul_legacy_f32 v5, v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x40] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x40 -# GFX90A: v_mul_legacy_f32_e64 v5, -v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x60] +# GFX90A: v_mul_legacy_f32 v5, -v1, -v2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x60] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x60 -# GFX90A: v_mul_legacy_f32_e64 v5, |v1|, v2 ; encoding: [0x05,0x01,0xa1,0xd2,0x01,0x05,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, |v1|, v2 ; encoding: [0x05,0x01,0xa1,0xd2,0x01,0x05,0x02,0x00] 0x05,0x01,0xa1,0xd2,0x01,0x05,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, |v2| ; encoding: [0x05,0x02,0xa1,0xd2,0x01,0x05,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, |v2| ; encoding: [0x05,0x02,0xa1,0xd2,0x01,0x05,0x02,0x00] 0x05,0x02,0xa1,0xd2,0x01,0x05,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, |v1|, |v2| ; encoding: [0x05,0x03,0xa1,0xd2,0x01,0x05,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, |v1|, |v2| ; encoding: [0x05,0x03,0xa1,0xd2,0x01,0x05,0x02,0x00] 0x05,0x03,0xa1,0xd2,0x01,0x05,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 clamp ; encoding: [0x05,0x80,0xa1,0xd2,0x01,0x05,0x02,0x00] +# GFX90A: v_mul_legacy_f32 v5, v1, v2 clamp ; encoding: [0x05,0x80,0xa1,0xd2,0x01,0x05,0x02,0x00] 0x05,0x80,0xa1,0xd2,0x01,0x05,0x02,0x00 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 mul:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x08] +# GFX90A: v_mul_legacy_f32 v5, v1, v2 mul:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x08] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x08 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 mul:4 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x10] +# GFX90A: v_mul_legacy_f32 v5, v1, v2 mul:4 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x10] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x10 -# GFX90A: v_mul_legacy_f32_e64 v5, v1, v2 div:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x18] +# GFX90A: v_mul_legacy_f32 v5, v1, v2 div:2 ; encoding: [0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x18] 0x05,0x00,0xa1,0xd2,0x01,0x05,0x02,0x18 # GFX90A: v_xor_b32_dpp v6, v29, v27 row_newbcast:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x36,0x0c,0x2a,0x1d,0x50,0x01,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt --- a/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt @@ -145,5 +145,5 @@ #===----------------------------------------------------------------------===// # FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid -# GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] +# GFX10: v_pk_fmac_f16 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] 0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12 diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt b/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt --- a/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt @@ -15,10 +15,10 @@ # GFX10: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00] 0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00 -# GFX10: v_bfm_b32_e64 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00] +# GFX10: v_bfm_b32 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00] 0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00 -# GFX10: v_bfm_b32_e64 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00] +# GFX10: v_bfm_b32 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00] 0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00 # GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00] @@ -54,5 +54,5 @@ # GFX10: v_ceil_f16_e64 v255, 0xabcd clamp ; encoding: [0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff] 0xff,0x80,0xdc,0xd5,0xff,0x00,0x00,0x00,0xcd,0xab,0xff,0xff -# GFX10: v_min_u16_e64 v5, v1, 0xabcd ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff] +# GFX10: v_min_u16 v5, v1, 0xabcd ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff] 0x05,0x00,0x0b,0xd7,0x01,0xff,0x01,0x00,0xcd,0xab,0xff,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/wave32.txt b/llvm/test/MC/Disassembler/AMDGPU/wave32.txt --- a/llvm/test/MC/Disassembler/AMDGPU/wave32.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/wave32.txt @@ -45,20 +45,20 @@ # GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00 -# GFX1032: v_add_co_u32_e64 v2, vcc_lo, s0, v2 -# GFX1064: v_add_co_u32_e64 v2, vcc, s0, v2 +# GFX1032: v_add_co_u32 v2, vcc_lo, s0, v2 +# GFX1064: v_add_co_u32 v2, vcc, s0, v2 0x02,0x6a,0x0f,0xd7,0x00,0x04,0x02,0x00 # GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo # GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; 0x03,0x09,0x06,0x50 -# GFX1032: v_sub_co_u32_e64 v2, vcc_lo, s0, v2 -# GFX1064: v_sub_co_u32_e64 v2, vcc, s0, v2 +# GFX1032: v_sub_co_u32 v2, vcc_lo, s0, v2 +# GFX1064: v_sub_co_u32 v2, vcc, s0, v2 0x02,0x6a,0x10,0xd7,0x00,0x04,0x02,0x00 -# GFX1032: v_subrev_co_u32_e64 v2, vcc_lo, s0, v2 -# GFX1064: v_subrev_co_u32_e64 v2, vcc, s0, v2 +# GFX1032: v_subrev_co_u32 v2, vcc_lo, s0, v2 +# GFX1064: v_subrev_co_u32 v2, vcc, s0, v2 0x02,0x6a,0x19,0xd7,0x00,0x04,0x02,0x00 # GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo @@ -107,24 +107,24 @@ # gfx1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 # 0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00 -# GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 -# GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 +# GFX1032: v_add_co_u32 v0, s0, v0, v2 +# GFX1064: v_add_co_u32 v0, s[0:1], v0, v2 0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00 # GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 # GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] 0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00 -# GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 -# GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 +# GFX1032: v_sub_co_u32 v0, s0, v0, v2 +# GFX1064: v_sub_co_u32 v0, s[0:1], v0, v2 0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00 # GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 # GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] 0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00 -# GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 -# GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 +# GFX1032: v_subrev_co_u32 v0, s0, v0, v2 +# GFX1064: v_subrev_co_u32 v0, s[0:1], v0, v2 0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00 # GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2