Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1083,6 +1083,11 @@ field dag Outs = (outs DstRC:$dst); + // VOP3b instructions are a special case with a second explicit + // output. This is manually overridden for them. + field dag Outs32 = Outs; + field dag Outs64 = Outs; + field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64.ret; @@ -1118,7 +1123,19 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> { + +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { + let Asm32 = "$dst, vcc, $src0, $src1"; + let Asm64 = "$dst, $sdst, $src0, $src1"; + let Outs32 = (outs DstRC:$dst); + let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); +} + +def VOP2b_I32_I1_I32_I32_VCC : VOPProfile<[i32, i32, i32, untyped]> { + let Asm32 = "$dst, vcc, $src0, $src1"; + let Asm64 = "$dst, $sdst, $src0, $src1"; + let Outs32 = (outs DstRC:$dst); + let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); let Src0RC32 = VCSrc_32; } @@ -1414,16 +1431,11 @@ def "" : VOP3_Pseudo , VOP2_REV; - // The VOP2 variant puts the carry out into VCC, the VOP3 variant - // can write it into any SGPR. We currently don't use the carry out, - // so for now hardcode it to VCC as well. - let sdst = SIOperand.VCC, Defs = [VCC] in { - def _si : VOP3b_Real_si , - VOP3DisableFields<1, 0, HasMods>; + def _si : VOP3b_Real_si , + VOP3DisableFields<1, 0, HasMods>; - def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, 0, HasMods>; - } // End sdst = SIOperand.VCC, Defs = [VCC] + def _vi : VOP3b_Real_vi , + VOP3DisableFields<1, 0, HasMods>; } multiclass VOP3b_3_m ; } -multiclass VOP2b_Helper pat32, dag ins64, string asm64, list pat64, string revOp, bit HasMods> { - defm _e32 : VOP2_m ; + defm _e32 : VOP2_m ; defm _e64 : VOP3b_2_m ; } multiclass VOP2bInst : VOP2b_Helper < - op, opName, P.Outs, + op, opName, P.Outs32, P.Outs64, P.Ins32, P.Asm32, [], P.Ins64, P.Asm64, !if(P.HasModifiers, Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1509,23 +1509,23 @@ // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, // but the VI instructions behave the same as the SI versions. defm V_ADD_I32 : VOP2bInst , "v_add_i32", - VOP_I32_I32_I32, add + VOP2b_I32_I1_I32_I32 >; -defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP_I32_I32_I32>; +defm V_SUB_I32 : VOP2bInst , "v_sub_i32", VOP2b_I32_I1_I32_I32>; defm V_SUBREV_I32 : VOP2bInst , "v_subrev_i32", - VOP_I32_I32_I32, null_frag, "v_sub_i32" + VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32" >; let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2bInst , "v_addc_u32", - VOP_I32_I32_I32_VCC + VOP2b_I32_I1_I32_I32_VCC >; defm V_SUBB_U32 : VOP2bInst , "v_subb_u32", - VOP_I32_I32_I32_VCC + VOP2b_I32_I1_I32_I32_VCC >; defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32", - VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32" + VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32" >; } // End Uses = [VCC] Index: test/CodeGen/AMDGPU/add.ll =================================================================== --- test/CodeGen/AMDGPU/add.ll +++ test/CodeGen/AMDGPU/add.ll @@ -5,7 +5,7 @@ ;FUNC-LABEL: {{^}}test1: ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}} ;SI-NOT: [[REG]] ;SI: buffer_store_dword [[REG]], define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -21,8 +21,8 @@ ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 @@ -39,10 +39,10 @@ ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 Index: test/CodeGen/AMDGPU/array-ptr-calc-i32.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -14,7 +14,7 @@ ; FIXME: We end up with zero argument for ADD, because ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index ; with the appropriate offset. We should fold this into the store. -; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}} +; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}} ; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}] ; ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this @@ -22,7 +22,7 @@ ; to interpret: ; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b -; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16 +; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16 ; SI-PROMOTE: ds_write_b32 [[PTRREG]] define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { %alloca = alloca [4 x i32], i32 4, align 16 Index: test/CodeGen/AMDGPU/cvt_f32_ubyte.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -154,7 +154,7 @@ ; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32: ; SI: buffer_load_dword [[LOADREG:v[0-9]+]], -; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]] +; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]] ; SI: buffer_store_dword [[CONV]], define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -10,13 +10,13 @@ ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] -; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]] -; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]] -; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1 Index: test/CodeGen/AMDGPU/ds_read2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2st64.ll +++ test/CodeGen/AMDGPU/ds_read2st64.ll @@ -65,7 +65,7 @@ ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm @@ -197,7 +197,7 @@ ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm Index: test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll +++ test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll @@ -425,7 +425,7 @@ ; SI: buffer_load_dword [[LOAD:v[0-9]+]] ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] -; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]] +; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] ; SI: buffer_store_dword [[TMP2]] define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { Index: test/CodeGen/AMDGPU/local-memory-two-objects.ll =================================================================== --- test/CodeGen/AMDGPU/local-memory-two-objects.ll +++ test/CodeGen/AMDGPU/local-memory-two-objects.ll @@ -30,7 +30,7 @@ ; constant offsets. ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] -; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} +; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] Index: test/CodeGen/AMDGPU/operand-folding.ll =================================================================== --- test/CodeGen/AMDGPU/operand-folding.ll +++ test/CodeGen/AMDGPU/operand-folding.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}fold_sgpr: -; CHECK: v_add_i32_e32 v{{[0-9]+}}, s +; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) { entry: %tmp0 = icmp ne i32 %fold, 0 Index: test/CodeGen/AMDGPU/scratch-buffer.ll =================================================================== --- test/CodeGen/AMDGPU/scratch-buffer.ll +++ test/CodeGen/AMDGPU/scratch-buffer.ll @@ -51,7 +51,7 @@ ; GCN-LABEL: {{^}}legal_offset_fi_offset ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen -; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000 ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { Index: test/CodeGen/AMDGPU/shl_add_constant.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_constant.ll +++ test/CodeGen/AMDGPU/shl_add_constant.ll @@ -6,7 +6,7 @@ ; FUNC-LABEL: {{^}}shl_2_add_9_i32: ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} -; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]] +; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { @@ -20,7 +20,7 @@ } ; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses: -; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}} +; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}} ; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}} ; SI-DAG: buffer_store_dword [[ADDREG]] ; SI-DAG: buffer_store_dword [[SHLREG]] @@ -40,7 +40,7 @@ ; FUNC-LABEL: {{^}}shl_2_add_999_i32: ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} -; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]] +; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { Index: test/CodeGen/AMDGPU/shl_add_ptr.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_ptr.ll +++ test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -35,7 +35,7 @@ ; SI-LABEL: {{^}}load_shl_base_lds_1: ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 -; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}} +; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} ; SI-DAG: buffer_store_dword [[RESULT]] ; SI-DAG: buffer_store_dword [[ADDUSE]] ; SI: s_endpgm Index: test/CodeGen/AMDGPU/sub.ll =================================================================== --- test/CodeGen/AMDGPU/sub.ll +++ test/CodeGen/AMDGPU/sub.ll @@ -7,7 +7,7 @@ ; FUNC-LABEL: {{^}}test_sub_i32: ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 %a = load i32, i32 addrspace(1)* %in @@ -22,8 +22,8 @@ ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 @@ -40,10 +40,10 @@ ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 Index: test/CodeGen/AMDGPU/udivrem.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem.ll +++ test/CodeGen/AMDGPU/udivrem.ll @@ -30,19 +30,19 @@ ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] -; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] -; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]] -; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]] +; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] +; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]] +; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]] +; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] ; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 @@ -110,15 +110,15 @@ ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] -; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -133,15 +133,15 @@ ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] -; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -260,15 +260,15 @@ ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] -; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -283,15 +283,15 @@ ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] -; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -306,15 +306,15 @@ ; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]] ; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]] -; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[THIRD_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]] -; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] -; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], vcc, [[THIRD_Num_S_Remainder]], {{v[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]] @@ -329,11 +329,11 @@ ; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]] ; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]] -; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FOURTH_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]] -; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { Index: test/CodeGen/AMDGPU/vop-shrink.ll =================================================================== --- test/CodeGen/AMDGPU/vop-shrink.ll +++ test/CodeGen/AMDGPU/vop-shrink.ll @@ -3,8 +3,8 @@ ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: -; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s -; SI: v_subrev_i32_e32 v{{[0-9]+}}, s +; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s +; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s ; ModuleID = 'vop-shrink.ll' Index: test/MC/AMDGPU/vop2.s =================================================================== --- test/MC/AMDGPU/vop2.s +++ test/MC/AMDGPU/vop2.s @@ -251,41 +251,85 @@ // VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] v_mbcnt_hi_u32_b32 v1, v2, v3 -// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] -// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] -v_add_i32 v1, v2, v3 +// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +v_add_i32 v1, vcc, v2, v3 -// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] -// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] -v_add_u32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_i32 v1, s[0:1], v2, v3 -// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] -v_sub_i32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_i32_e64 v1, s[0:1], v2, v3 -// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] -v_sub_u32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_i32_e64 v1, vcc, v2, v3 -// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] -// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] -v_subrev_i32 v1, v2, v3 +// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +v_add_u32 v1, vcc, v2, v3 -// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] -// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] -v_subrev_u32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_u32 v1, s[0:1], v2, v3 -// SICI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] -// VI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] -v_addc_u32 v1, v2, v3 +// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +v_sub_i32 v1, vcc, v2, v3 -// SICI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] -// VI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] -v_subb_u32 v1, v2, v3 +// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +v_sub_i32 v1, s[0:1], v2, v3 -// SICI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] -// VI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] -v_subbrev_u32 v1, v2, v3 +// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +v_sub_u32 v1, vcc, v2, v3 + +// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +v_sub_u32 v1, s[0:1], v2, v3 + +// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +v_subrev_i32 v1, vcc, v2, v3 + +// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +v_subrev_i32 v1, s[0:1], v2, v3 + +// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +v_subrev_u32 v1, vcc, v2, v3 + +// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +v_subrev_u32 v1, s[0:1], v2, v3 + +// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32 v1, vcc, v2, v3 + +// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00] +v_addc_u32 v1, s[0:1], v2, v3 + +// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] +// VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] +v_subb_u32 v1, vcc, v2, v3 + +// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00] +v_subb_u32 v1, s[0:1], v2, v3 + +// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] +// VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +v_subbrev_u32 v1, vcc, v2, v3 + +// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00] +v_subbrev_u32 v1, s[0:1], v2, v3 // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]