Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -657,6 +657,17 @@ VOP3e_vi (NAME#"_e64").Pfl>; } +multiclass VOP2_Real_e64only_vi op> { + def _e64_vi : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3e_vi (NAME#"_e64").Pfl> { + // Hack to stop printing _e64 + VOP3_Pseudo ps = !cast(NAME#"_e64"); + let OutOperandList = (outs VGPR_32:$vdst); + let AsmString = ps.Mnemonic # " " # ps.AsmOperands; + } +} + multiclass Base_VOP2be_Real_e32e64_vi op> : VOP2_Real_e32_vi { def _e64_vi : VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, @@ -724,17 +735,17 @@ defm V_READLANE_B32 : VOP32_Real_vi <0x289>; defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; -defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>; -defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>; -defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>; -defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>; -defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>; -defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>; -defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>; -defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>; -defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>; -defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>; -defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>; +defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; +defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; +defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; +defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; +defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; +defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; +defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; +defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; +defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; +defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; Index: test/CodeGen/AMDGPU/constant-fold-mi-operands.ll =================================================================== --- test/CodeGen/AMDGPU/constant-fold-mi-operands.ll +++ test/CodeGen/AMDGPU/constant-fold-mi-operands.ll @@ -25,7 +25,7 @@ } ; GCN-LABEL: {{^}}fold_mi_v_or_0: -; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]] +; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] ; GCN-NOT: [[RESULT]] ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) { @@ -50,7 +50,7 @@ } ; GCN-LABEL: {{^}}fold_mi_v_xor_0: -; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]] +; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] ; GCN-NOT: [[RESULT]] ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) { @@ -86,8 +86,8 @@ } ; GCN-LABEL: {{^}}fold_mi_v_not_0: -; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} -; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} +; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} ; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]] ; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}} ; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} @@ -104,8 +104,8 @@ ; GCN: buffer_load_dwordx2 ; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}} -; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} -; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} +; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} ; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]] ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]] ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]] Index: test/CodeGen/AMDGPU/ctpop.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop.ll +++ test/CodeGen/AMDGPU/ctpop.ll @@ -25,7 +25,7 @@ ; XXX - Why 0 in register? ; FUNC-LABEL: {{^}}v_ctpop_i32: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0 ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -40,9 +40,9 @@ ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: ; GCN: buffer_load_dword [[VAL1:v[0-9]+]], ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] -; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -61,7 +61,7 @@ ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32: ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], ; GCN: s_waitcnt -; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} +; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { @@ -73,8 +73,8 @@ } ; FUNC-LABEL: {{^}}v_ctpop_v2i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -87,10 +87,10 @@ } ; FUNC-LABEL: {{^}}v_ctpop_v4i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -105,14 +105,14 @@ } ; FUNC-LABEL: {{^}}v_ctpop_v8i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -131,22 +131,22 @@ } ; FUNC-LABEL: {{^}}v_ctpop_v16i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -174,7 +174,7 @@ ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4 ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -189,7 +189,7 @@ ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4 ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -206,7 +206,7 @@ ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] -; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -220,7 +220,7 @@ ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var: ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -236,7 +236,7 @@ ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv: ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -253,7 +253,7 @@ ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}} ; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm Index: test/CodeGen/AMDGPU/ctpop64.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop64.ll +++ test/CodeGen/AMDGPU/ctpop64.ll @@ -26,9 +26,9 @@ ; FUNC-LABEL: {{^}}v_ctpop_i64: ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { @@ -41,9 +41,9 @@ ; FUNC-LABEL: {{^}}v_ctpop_i64_user: ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]] ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}} ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} @@ -171,11 +171,11 @@ ; FUNC-LABEL: {{^}}v_ctpop_i128: ; GCN: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0 -; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]] +; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0 +; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]] -; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0 -; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]] +; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0 +; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]] ; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]] Index: test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -7,7 +7,7 @@ ; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}} ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]] -; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]] +; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[X]], [[VY]] define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 { %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) store <2 x half> %result, <2 x half> addrspace(1)* %out @@ -16,7 +16,7 @@ ; GCN-LABEL: {{^}}s_cvt_pkrtz_samereg_v2f16_f32: ; GCN: s_load_dword [[X:s[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[X]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]] define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 { %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x) store <2 x half> %result, <2 x half> addrspace(1)* %out @@ -39,7 +39,7 @@ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]] -; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]] +; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -55,7 +55,7 @@ ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], 1.0 +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -70,7 +70,7 @@ ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]] -; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]] +; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, 1.0, [[A]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -85,7 +85,7 @@ ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], [[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -103,7 +103,7 @@ ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], -[[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -121,7 +121,7 @@ ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], -[[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -140,7 +140,7 @@ ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -|[[A]]|, -[[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mbcnt_intrinsics: -; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[LO:v[0-9]+]], -1, 0 ; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]] -; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]] +; VI: v_mbcnt_hi_u32_b32 {{v[0-9]+}}, -1, [[LO]] define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) { main_body: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 Index: test/MC/AMDGPU/vop2.s =================================================================== --- test/MC/AMDGPU/vop2.s +++ test/MC/AMDGPU/vop2.s @@ -243,7 +243,7 @@ v_xor_b32_e32 v1, v2, v3 // SICI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x3c,0xd2,0x02,0x07,0x02,0x00] -// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] +// VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] v_bfm_b32_e64 v1, v2, v3 // SICI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e] @@ -259,15 +259,15 @@ v_madak_f32 v1, v2, v3, 64.0 // SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00] -// VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] +// VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] v_bcnt_u32_b32_e64 v1, v2, v3 // SICI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x46,0xd2,0x02,0x07,0x02,0x00] -// VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] +// VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] v_mbcnt_lo_u32_b32_e64 v1, v2, v3 // SICI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x48,0xd2,0x02,0x07,0x02,0x00] -// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] +// VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] v_mbcnt_hi_u32_b32_e64 v1, v2, v3 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] @@ -376,31 +376,31 @@ v_subbrev_u32 v1, s[0:1], v2, v3, vcc // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] -// VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00] +// VI: v_ldexp_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00] v_ldexp_f32 v1, v2, v3 // SICI: v_cvt_pkaccum_u8_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x58] -// VI: v_cvt_pkaccum_u8_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00] +// VI: v_cvt_pkaccum_u8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00] v_cvt_pkaccum_u8_f32 v1, v2, v3 // SICI: v_cvt_pknorm_i16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5a] -// VI: v_cvt_pknorm_i16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00] +// VI: v_cvt_pknorm_i16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00] v_cvt_pknorm_i16_f32 v1, v2, v3 // SICI: v_cvt_pknorm_u16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5c] -// VI: v_cvt_pknorm_u16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00] +// VI: v_cvt_pknorm_u16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00] v_cvt_pknorm_u16_f32 v1, v2, v3 // SICI: v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x5e] -// VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00] +// VI: v_cvt_pkrtz_f16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00] v_cvt_pkrtz_f16_f32 v1, v2, v3 // SICI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x60,0xd2,0x02,0x07,0x02,0x00] -// VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] +// VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_u16_u32_e64 v1, v2, v3 // SICI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x62,0xd2,0x02,0x07,0x02,0x00] -// VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] +// VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_i16_i32_e64 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU Index: test/MC/AMDGPU/vop3-convert.s =================================================================== --- test/MC/AMDGPU/vop3-convert.s +++ test/MC/AMDGPU/vop3-convert.s @@ -288,31 +288,31 @@ v_xor_b32 v1, v2, v3 // SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] -// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] +// VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] v_bfm_b32 v1, v2, v3 // SICI: v_bcnt_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x44] -// VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] +// VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] v_bcnt_u32_b32 v1, v2, v3 // SICI: v_mbcnt_lo_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46] -// VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] +// VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] v_mbcnt_lo_u32_b32 v1, v2, v3 // SICI: v_mbcnt_hi_u32_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x48] -// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] +// VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] v_mbcnt_hi_u32_b32 v1, v2, v3 // SICI: v_cvt_pk_u16_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x60] -// VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] +// VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_u16_u32 v1, v2, v3 // SICI: v_cvt_pk_i16_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x62] -// VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] +// VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] v_cvt_pk_i16_i32 v1, v2, v3 // SICI: v_bfm_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] -// VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] +// VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] v_bfm_b32 v1, v2, v3 // NOSICI: error: instruction not supported on this GPU Index: test/MC/Disassembler/AMDGPU/vop2_vi.txt =================================================================== --- test/MC/Disassembler/AMDGPU/vop2_vi.txt +++ test/MC/Disassembler/AMDGPU/vop2_vi.txt @@ -72,7 +72,7 @@ # VI: v_xor_b32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2a] 0x02 0x07 0x02 0x2a -# VI: v_bfm_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] +# VI: v_bfm_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x93,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x93 0xd2 0x02 0x07 0x02 0x00 # VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c] @@ -84,13 +84,13 @@ # VI: v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42] 0x02 0x07 0x02 0x30 0x00 0x00 0x80 0x42 -# VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] +# VI: v_bcnt_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x8b 0xd2 0x02 0x07 0x02 0x00 -# VI: v_mbcnt_lo_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] +# VI: v_mbcnt_lo_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8c,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x8c 0xd2 0x02 0x07 0x02 0x00 -# VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] +# VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x8d 0xd2 0x02 0x07 0x02 0x00 # VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] @@ -171,25 +171,25 @@ # VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01] 0x01 0x00 0x1e 0xd1 0x02 0x07 0xaa 0x01 -# VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00] +# VI: v_ldexp_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x88 0xd2 0x02 0x07 0x02 0x00 -# VI: v_cvt_pkaccum_u8_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00] +# VI: v_cvt_pkaccum_u8_f32 v1, v2, v3 ; encoding: [0x01,0x00,0xf0,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0xf0 0xd1 0x02 0x07 0x02 0x00 -# VI: v_cvt_pknorm_i16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00] +# VI: v_cvt_pknorm_i16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x94,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x94 0xd2 0x02 0x07 0x02 0x00 -# VI: v_cvt_pknorm_u16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00] +# VI: v_cvt_pknorm_u16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x95,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x95 0xd2 0x02 0x07 0x02 0x00 -# VI: v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00] +# VI: v_cvt_pkrtz_f16_f32 v1, v2, v3 ; encoding: [0x01,0x00,0x96,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x96 0xd2 0x02 0x07 0x02 0x00 -# VI: v_cvt_pk_u16_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] +# VI: v_cvt_pk_u16_u32 v1, v2, v3 ; encoding: [0x01,0x00,0x97,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x97 0xd2 0x02 0x07 0x02 0x00 -# VI: v_cvt_pk_i16_i32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] +# VI: v_cvt_pk_i16_i32 v1, v2, v3 ; encoding: [0x01,0x00,0x98,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x98 0xd2 0x02 0x07 0x02 0x00 # VI: v_add_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3e]