Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -107,14 +107,15 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { SIEncodingFamily Gen = subtargetEncodingFamily(ST); + + if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && + ST.getGeneration() >= AMDGPUSubtarget::GFX9) + Gen = SIEncodingFamily::GFX9; + if (get(Opcode).TSFlags & SIInstrFlags::SDWA) Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 : SIEncodingFamily::SDWA; - if ((get(Opcode).TSFlags & SIInstrFlags::F16_ZFILL) != 0 && - ST.getGeneration() >= AMDGPUSubtarget::GFX9) - Gen = SIEncodingFamily::GFX9; - int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); // -1 means that Opcode is already a native instruction. Index: lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp =================================================================== --- lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -212,6 +212,9 @@ Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); if (Res) break; + Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); + if (Res) break; + if (Bytes.size() < 4) break; const uint64_t QW = ((uint64_t)eatBytes(Bytes) << 32) | DW; Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -69,7 +69,7 @@ VOPAsmPrefer32Bit = UINT64_C(1) << 41, VOP3_OPSEL = UINT64_C(1) << 42, maybeAtomic = UINT64_C(1) << 43, - F16_ZFILL = UINT64_C(1) << 44, + renamedInGFX9 = UINT64_C(1) << 44, // Is a clamp on FP type. FPClamp = UINT64_C(1) << 45, Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -95,9 +95,9 @@ // Is it possible for this instruction to be atomic? field bit maybeAtomic = 0; - // This bit indicates that this is a 16-bit instruction which zero-fills - // unused bits in dst. Note that new GFX9 opcodes preserve unused bits. - field bit F16_ZFILL = 0; + // This bit indicates that this is a VI instruction which is renamed + // in GFX9. Required for correct mapping from pseudo to MC. + field bit renamedInGFX9 = 0; // This bit indicates that this has a floating point result type, so // the clamp modifier has floating point semantics. @@ -161,7 +161,7 @@ let TSFlags{42} = VOP3_OPSEL; let TSFlags{43} = maybeAtomic; - let TSFlags{44} = F16_ZFILL; + let TSFlags{44} = renamedInGFX9; let TSFlags{45} = FPClamp; let TSFlags{46} = IntClamp; Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1510,47 +1510,3 @@ def : Int16Med3Pat; def : Int16Med3Pat; } // End Predicates = [isGFX9] - -//============================================================================// -// Assembler aliases -//============================================================================// - -multiclass NoCarryAlias { - def : InstAlias, - Requires<[HasAddNoCarryInsts]>; - - def : InstAlias, - Requires<[HasAddNoCarryInsts]>; - - def : InstAlias, - Requires<[HasAddNoCarryInsts]>; - - def : InstAlias, - Requires<[HasAddNoCarryInsts]>; -} - -// gfx9 made a mess of add instruction names. The existing add -// instructions add _co added to the names, and their old names were -// repurposed to a version without carry out. -// TODO: Do we need SubtargetPredicates for MnemonicAliases? -let Predicates = [HasAddNoCarryInsts] in { -defm : NoCarryAlias<"v_add_u32", V_ADD_U32_e32_vi, V_ADD_U32_e64_vi, - V_ADD_I32_e32_vi, V_ADD_I32_e64_vi>; -defm : NoCarryAlias<"v_sub_u32", V_SUB_U32_e32_vi, V_SUB_U32_e64_vi, - V_SUB_I32_e32_vi, V_SUB_I32_e64_vi>; -defm : NoCarryAlias<"v_subrev_u32", - V_SUBREV_U32_e32_vi, V_SUBREV_U32_e64_vi, - V_SUBREV_I32_e32_vi, V_SUBREV_I32_e64_vi>; -} - -let Predicates = [NotHasAddNoCarryInsts] in { -def : MnemonicAlias<"v_add_u32", "v_add_i32">; -def : MnemonicAlias<"v_sub_u32", "v_sub_i32">; -def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">; -} Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -143,20 +143,22 @@ VOPProfile P, SDPatternOperator node = null_frag, string revOp = opName, + bit GFX9Renamed = 0, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { - - let SchedRW = [Write32Bit, WriteSALU] in { - let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { - def _e32 : VOP2_Pseudo , - Commutable_REV; - - def _sdwa : VOP2_SDWA_Pseudo { - let AsmMatchConverter = "cvtSdwaVOP2b"; + let renamedInGFX9 = GFX9Renamed in { + let SchedRW = [Write32Bit, WriteSALU] in { + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { + def _e32 : VOP2_Pseudo , + Commutable_REV; + + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } } - } - def _e64 : VOP3_Pseudo .ret>, - Commutable_REV; + def _e64 : VOP3_Pseudo .ret>, + Commutable_REV; + } } } @@ -278,13 +280,13 @@ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, - clampmod:$clamp, omod:$omod, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let InsDPP = (ins DstRCDPP:$old, - Src0Mod:$src0_modifiers, Src0DPP:$src0, - Src1Mod:$src1_modifiers, Src1DPP:$src1, + Src0DPP:$src0, + Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; @@ -370,12 +372,12 @@ // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, // but the VI instructions behave the same as the SI versions. -defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>; -defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>; -defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">; -defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>; -defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>; -defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; +defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>; +defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; +defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; +defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; +defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; +defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; let SubtargetPredicate = HasAddNoCarryInsts in { @@ -660,8 +662,8 @@ // VI //===----------------------------------------------------------------------===// -class VOP2_DPP op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> : - VOP_DPP { +class VOP2_DPP op, VOP2_Pseudo ps, string OpName = ps.OpName, VOPProfile P = ps.Pfl> : + VOP_DPP { let Defs = ps.Defs; let Uses = ps.Uses; let SchedRW = ps.SchedRW; @@ -743,6 +745,86 @@ def _dpp : VOP2_DPP(NAME#"_e32")>; } +let AssemblerPredicates = [isVIOnly] in { + +multiclass VOP2be_Real_e32e64_vi_only op, string OpName, string AsmName> { + def _e32_vi : + VOP2_Real(OpName#"_e32"), SIEncodingFamily.VI>, + VOP2e(OpName#"_e32").Pfl> { + VOP2_Pseudo ps = !cast(OpName#"_e32"); + let AsmString = AsmName # ps.AsmOperands; + let DecoderNamespace = "VI"; + } + def _e64_vi : + VOP3_Real(OpName#"_e64"), SIEncodingFamily.VI>, + VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(OpName#"_e64").Pfl> { + VOP3_Pseudo ps = !cast(OpName#"_e64"); + let AsmString = AsmName # ps.AsmOperands; + let DecoderNamespace = "VI"; + } + def _sdwa_vi : + VOP_SDWA_Real (OpName#"_sdwa")>, + VOP2_SDWAe (OpName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); + let AsmString = AsmName # ps.AsmOperands; + } + def _dpp : + VOP2_DPP(OpName#"_e32"), AsmName>; +} +} + +let AssemblerPredicates = [isGFX9] in { + +multiclass VOP2be_Real_e32e64_gfx9 op, string OpName, string AsmName> { + def _e32_gfx9 : + VOP2_Real(OpName#"_e32"), SIEncodingFamily.GFX9>, + VOP2e(OpName#"_e32").Pfl> { + VOP2_Pseudo ps = !cast(OpName#"_e32"); + let AsmString = AsmName # ps.AsmOperands; + let DecoderNamespace = "GFX9"; + } + def _e64_gfx9 : + VOP3_Real(OpName#"_e64"), SIEncodingFamily.GFX9>, + VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(OpName#"_e64").Pfl> { + VOP3_Pseudo ps = !cast(OpName#"_e64"); + let AsmString = AsmName # ps.AsmOperands; + let DecoderNamespace = "GFX9"; + } + def _sdwa_gfx9 : + VOP_SDWA9_Real (OpName#"_sdwa")>, + VOP2_SDWA9Ae (OpName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); + let AsmString = AsmName # ps.AsmOperands; + } + def _dpp_gfx9 : + VOP2_DPP(OpName#"_e32"), AsmName> { + let DecoderNamespace = "SDWA9"; + } +} + +multiclass VOP2_Real_e32e64_gfx9 op> { + def _e32_gfx9 : + VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX9>, + VOP2e(NAME#"_e32").Pfl>{ + let DecoderNamespace = "GFX9"; + } + def _e64_gfx9 : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX9>, + VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl> { + let DecoderNamespace = "GFX9"; + } + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa")>, + VOP2_SDWA9Ae (NAME#"_sdwa").Pfl> { + } + def _dpp_gfx9 : + VOP2_DPP(NAME#"_e32")> { + let DecoderNamespace = "SDWA9"; + } +} + +} // AssemblerPredicates = [isGFX9] + multiclass VOP2_Real_e32e64_vi op> : Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { // For now left dpp only for asm/dasm @@ -775,12 +857,24 @@ defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; -defm V_ADD_I32 : VOP2be_Real_e32e64_vi <0x19>; -defm V_SUB_I32 : VOP2be_Real_e32e64_vi <0x1a>; -defm V_SUBREV_I32 : VOP2be_Real_e32e64_vi <0x1b>; -defm V_ADDC_U32 : VOP2be_Real_e32e64_vi <0x1c>; -defm V_SUBB_U32 : VOP2be_Real_e32e64_vi <0x1d>; -defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>; + +defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">; +defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">; +defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">; +defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; +defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; +defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; + +defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">; +defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">; +defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">; +defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; +defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; +defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; + +defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; +defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; +defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; defm V_READLANE_B32 : VOP32_Real_vi <0x289>; defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; @@ -840,9 +934,3 @@ def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; } // End SubtargetPredicate = isVI - -let SubtargetPredicate = HasAddNoCarryInsts in { -defm V_ADD_U32 : VOP2_Real_e32e64_vi <0x34>; -defm V_SUB_U32 : VOP2_Real_e32e64_vi <0x35>; -defm V_SUBREV_U32 : VOP2_Real_e32e64_vi <0x36>; -} Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -408,7 +408,7 @@ let SubtargetPredicate = Has16BitInsts in { -let F16_ZFILL = 1 in { +let renamedInGFX9 = 1 in { def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; } let SubtargetPredicate = isGFX9 in { @@ -417,7 +417,7 @@ let isCommutable = 1 in { -let F16_ZFILL = 1 in { +let renamedInGFX9 = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; @@ -504,6 +504,9 @@ def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile>; def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile>; + +def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile>; +def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile>; } // End SubtargetPredicate = isGFX9 //===----------------------------------------------------------------------===// @@ -701,6 +704,14 @@ } } +multiclass VOP3_Real_gfx9 op, string AsmName> { + def _vi : VOP3_Real(NAME), SIEncodingFamily.GFX9>, + VOP3e_vi (NAME).Pfl> { + VOP3_Pseudo ps = !cast(NAME); + let AsmString = AsmName # ps.AsmOperands; + } +} + } // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; @@ -767,6 +778,9 @@ defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; +defm V_ADD_I32_gfx9 : VOP3_Real_gfx9 <0x29c, "v_add_i32">; +defm V_SUB_I32_gfx9 : VOP3_Real_gfx9 <0x29d, "v_sub_i32">; + defm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>; defm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>; defm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_vi <0x272>; Index: test/CodeGen/AMDGPU/add.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/add.v2i16.ll +++ test/CodeGen/AMDGPU/add.v2i16.ll @@ -52,8 +52,8 @@ ; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg: ; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; VI: v_add_i32 -; VI: v_add_i32_sdwa +; VI: v_add_u32 +; VI: v_add_u32_sdwa define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 { %add = add <2 x i16> %a, %b store <2 x i16> %add, <2 x i16> addrspace(1)* %out Index: test/CodeGen/AMDGPU/amdgcn.private-memory.ll =================================================================== --- test/CodeGen/AMDGPU/amdgcn.private-memory.ll +++ test/CodeGen/AMDGPU/amdgcn.private-memory.ll @@ -13,7 +13,7 @@ ; GCN-LABEL: {{^}}work_item_info: ; GCN-NOT: v0 -; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}} ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { entry: Index: test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll +++ test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}shader_cc: -; GCN: v_add_i32_e32 v0, vcc, s8, v0 +; GCN: v_add_{{[iu]}}32_e32 v0, vcc, s8, v0 define amdgpu_cs float @shader_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) { %vi = bitcast float %v to i32 %x = add i32 %vi, %w Index: test/CodeGen/AMDGPU/bfe-combine.ll =================================================================== --- test/CodeGen/AMDGPU/bfe-combine.ll +++ test/CodeGen/AMDGPU/bfe-combine.ll @@ -9,8 +9,8 @@ ; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 6, v{{[0-9]+}} ; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]] -; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] -; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] ; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 @@ -29,11 +29,11 @@ ; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15 ; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}] -; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] ; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}} ; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]] ; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2 -; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] ; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 Index: test/CodeGen/AMDGPU/bfe-patterns.ll =================================================================== --- test/CodeGen/AMDGPU/bfe-patterns.ll +++ test/CodeGen/AMDGPU/bfe-patterns.ll @@ -22,7 +22,7 @@ ; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32: ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] -; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] +; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] ; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] @@ -100,7 +100,7 @@ ; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32: ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] -; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] +; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] ; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -38,11 +38,11 @@ ; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}} ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} -; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] +; GCN: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}} ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} -; GCN: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] +; GCN: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] ; GCN: s_swappc_b64 Index: test/CodeGen/AMDGPU/ctlz.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz.ll +++ test/CodeGen/AMDGPU/ctlz.ll @@ -112,7 +112,7 @@ ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 32, [[FFBH]], vcc ; SI: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, 24, [[SELECT]] -; VI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]] +; VI: v_add_u32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]] ; GCN: buffer_store_byte [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { @@ -151,7 +151,7 @@ ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] -; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], vcc ; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]] Index: test/CodeGen/AMDGPU/ctlz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -126,7 +126,7 @@ ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] -; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]] ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}} Index: test/CodeGen/AMDGPU/ctpop64.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop64.ll +++ test/CodeGen/AMDGPU/ctpop64.ll @@ -188,7 +188,7 @@ ; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0 ; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]] -; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]] +; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm Index: test/CodeGen/AMDGPU/cvt_f32_ubyte.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -169,7 +169,7 @@ ; GCN-LABEL: {{^}}i8_zext_inreg_i32_to_f32: ; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]], -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] +; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] ; GCN-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]] ; GCN: buffer_store_dword [[CONV]], define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { Index: test/CodeGen/AMDGPU/ds-combine-large-stride.ll =================================================================== --- test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -4,12 +4,12 @@ ; GCN-LABEL: ds_read32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 @@ -46,12 +46,12 @@ ; GCN-LABEL: ds_read32_combine_stride_400_back: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 @@ -124,12 +124,12 @@ ; GCN-LABEL: ds_read32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32 @@ -160,8 +160,8 @@ ; GCN-LABEL: ds_read64_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250 @@ -198,12 +198,12 @@ ; GCN-LABEL: ds_read64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16 @@ -234,12 +234,12 @@ ; GCN-LABEL: ds_write32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 @@ -267,12 +267,12 @@ ; GCN-LABEL: ds_write32_combine_stride_400_back: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 @@ -327,12 +327,12 @@ ; GCN-LABEL: ds_write32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] ; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 ; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 ; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 @@ -356,8 +356,8 @@ ; GCN-LABEL: ds_write64_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250 @@ -385,12 +385,12 @@ ; GCN-LABEL: ds_write64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] ; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 ; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 ; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 Index: test/CodeGen/AMDGPU/fence-barrier.ll =================================================================== --- test/CodeGen/AMDGPU/fence-barrier.ll +++ test/CodeGen/AMDGPU/fence-barrier.ll @@ -53,7 +53,7 @@ } ; GCN-LABEL: {{^}}test_global -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} +; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} ; GCN: flat_store_dword ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier Index: test/CodeGen/AMDGPU/function-args.ll =================================================================== --- test/CodeGen/AMDGPU/function-args.ll +++ test/CodeGen/AMDGPU/function-args.ll @@ -24,7 +24,7 @@ ; GCN-LABEL: {{^}}void_func_i1_signext: ; GCN: s_waitcnt -; GCN-NEXT: v_add_i32_e32 v0, vcc, 12, v0 +; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 ; GCN-NOT: v0 ; GCN: buffer_store_dword v0, off define void @void_func_i1_signext(i1 signext %arg0) #0 { @@ -60,7 +60,7 @@ ; GCN-LABEL: {{^}}void_func_i8_zeroext: ; GCN-NOT: and_b32 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -70,7 +70,7 @@ ; GCN-LABEL: {{^}}void_func_i8_signext: ; GCN-NOT: v_bfe_i32 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i8_signext(i8 signext %arg0) #0 { %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -87,7 +87,7 @@ ; GCN-LABEL: {{^}}void_func_i16_zeroext: ; GCN-NOT: v0 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 @@ -97,7 +97,7 @@ ; GCN-LABEL: {{^}}void_func_i16_signext: ; GCN-NOT: v0 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i16_signext(i16 signext %arg0) #0 { %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -103,7 +103,7 @@ } ;CHECK-LABEL: {{^}}buffer_load_negative_offset: -;CHECK: v_add_i32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0 +;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0 ;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) { main_body: Index: test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -51,7 +51,7 @@ ; GCN: s_bfm_b64 exec, s1, 0 ; GCN: s_cmp_eq_u32 s1, 64 ; GCN: s_cmov_b64 exec, -1 -; GCN: v_add_i32_e32 v0, vcc, s0, v0 +; GCN: v_add_co_u32_e32 v0, vcc, s0, v0 define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) { main_body: call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19) @@ -65,7 +65,7 @@ ; GCN: s_bfm_b64 exec, s1, 0 ; GCN: s_cmp_eq_u32 s1, 64 ; GCN: s_cmov_b64 exec, -1 -; GCN: v_add_i32_e32 v0, vcc, s0, v0 +; GCN: v_add_co_u32_e32 v0, vcc, s0, v0 define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) { main_body: %s = add i32 %a, %count Index: test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll @@ -396,7 +396,7 @@ ; GCN: buffer_load_dword [[LOAD:v[0-9]+]] ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 ; GCN: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] -; GCN: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] +; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] ; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] ; GCN: buffer_store_dword [[TMP2]] define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { Index: test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll @@ -65,7 +65,7 @@ ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 ; FIXME: Should be using s_add_i32 ; GCN-NOT: {{[^@]}}bfe @@ -81,7 +81,7 @@ ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm @@ -96,7 +96,7 @@ ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN: bfe ; GCN: s_endpgm define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { @@ -110,7 +110,7 @@ ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8 ; GCN-NEXT: bfe ; GCN: s_endpgm @@ -125,7 +125,7 @@ ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80 ; GCN-NEXT: bfe ; GCN: s_endpgm @@ -140,7 +140,7 @@ ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: bfe ; GCN: s_endpgm define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { Index: test/CodeGen/AMDGPU/mul.ll =================================================================== --- test/CodeGen/AMDGPU/mul.ll +++ test/CodeGen/AMDGPU/mul.ll @@ -242,7 +242,7 @@ ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_mul_lo_i32 -; SI: v_add_i32_e32 +; SI: v_add_{{[iu]}}32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_mul_hi_u32 Index: test/CodeGen/AMDGPU/pack.v2f16.ll =================================================================== --- test/CodeGen/AMDGPU/pack.v2f16.ll +++ test/CodeGen/AMDGPU/pack.v2f16.ll @@ -87,7 +87,7 @@ ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] -; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] +; GFX9: v_add_{{[_coiu]*}}32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/pack.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/pack.v2i16.ll +++ test/CodeGen/AMDGPU/pack.v2i16.ll @@ -81,7 +81,7 @@ ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] -; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] +; GFX9: v_add_{{[_coiu]*}}32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/rotl.i64.ll =================================================================== --- test/CodeGen/AMDGPU/rotl.i64.ll +++ test/CodeGen/AMDGPU/rotl.i64.ll @@ -20,7 +20,7 @@ ; BOTH-LABEL: {{^}}v_rotl_i64: ; SI-DAG: v_lshl_b64 ; VI-DAG: v_lshlrev_b64 -; BOTH-DAG: v_sub_i32 +; BOTH-DAG: v_sub_{{[iu]}}32 ; SI: v_lshr_b64 ; VI: v_lshrrev_b64 ; BOTH: v_or_b32 Index: test/CodeGen/AMDGPU/rotr.i64.ll =================================================================== --- test/CodeGen/AMDGPU/rotr.i64.ll +++ test/CodeGen/AMDGPU/rotr.i64.ll @@ -17,7 +17,7 @@ } ; BOTH-LABEL: {{^}}v_rotr_i64: -; BOTH-DAG: v_sub_i32 +; BOTH-DAG: v_sub_{{[iu]}}32 ; SI-DAG: v_lshr_b64 ; SI-DAG: v_lshl_b64 ; VI-DAG: v_lshrrev_b64 Index: test/CodeGen/AMDGPU/saddo.ll =================================================================== --- test/CodeGen/AMDGPU/saddo.ll +++ test/CodeGen/AMDGPU/saddo.ll @@ -49,7 +49,7 @@ } ; FUNC-LABEL: {{^}}v_saddo_i64: -; SI: v_add_i32 +; SI: v_add_{{[iu]}}32 ; SI: v_addc_u32 define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 Index: test/CodeGen/AMDGPU/scratch-buffer.ll =================================================================== --- test/CodeGen/AMDGPU/scratch-buffer.ll +++ test/CodeGen/AMDGPU/scratch-buffer.ll @@ -50,7 +50,7 @@ ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} ; This constant isn't folded, because it has multiple uses. ; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8004 -; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]] ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { Index: test/CodeGen/AMDGPU/sdiv.ll =================================================================== --- test/CodeGen/AMDGPU/sdiv.ll +++ test/CodeGen/AMDGPU/sdiv.ll @@ -37,10 +37,10 @@ ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; SI-DAG: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b ; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]] -; SI: v_add_i32 +; SI: v_add_{{[iu]}}32 ; SI: v_lshrrev_b32 ; SI: v_ashrrev_i32 -; SI: v_add_i32 +; SI: v_add_{{[iu]}}32 ; SI: buffer_store_dword ; SI: s_endpgm define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { Index: test/CodeGen/AMDGPU/sdwa-peephole.ll =================================================================== --- test/CodeGen/AMDGPU/sdwa-peephole.ll +++ test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -4,10 +4,10 @@ ; GCN-LABEL: {{^}}add_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} -; NOSDWA: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] -; NOSDWA-NOT: v_add_i32_sdwa +; NOSDWA: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] +; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa -; SDWA: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA: v_add_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 @@ -19,10 +19,10 @@ ; GCN-LABEL: {{^}}sub_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} -; NOSDWA: v_subrev_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] -; NOSDWA-NOT: v_subrev_i32_sdwa +; NOSDWA: v_subrev_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] +; NOSDWA-NOT: v_subrev_{{[_cou]*}}32_sdwa -; SDWA: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA: v_subrev_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 @@ -426,9 +426,9 @@ } ; GCN-LABEL: {{^}}add_bb_v2i16: -; NOSDWA-NOT: v_add_i32_sdwa +; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa -; VI: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll +++ test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll @@ -5,7 +5,7 @@ ; used in an REG_SEQUENCE that also needs to be handled. ; SI-LABEL: {{^}}test_dup_operands: -; SI: v_add_i32_e32 +; SI: v_add_{{[iu]}}32_e32 define amdgpu_kernel void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) { %a = load <2 x i32>, <2 x i32> addrspace(1)* %in %lo = extractelement <2 x i32> %a, i32 0 Index: test/CodeGen/AMDGPU/sgpr-copy.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-copy.ll +++ test/CodeGen/AMDGPU/sgpr-copy.ll @@ -321,7 +321,7 @@ ; CHECK: s_cmp_eq_u32 ; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]] -; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} +; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} ; [[END]]: ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}} Index: test/CodeGen/AMDGPU/shl-add-to-add-shl.ll =================================================================== --- test/CodeGen/AMDGPU/shl-add-to-add-shl.ll +++ test/CodeGen/AMDGPU/shl-add-to-add-shl.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: {{^}}add_const_offset: ; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0 -; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]] +; CHECK: v_add_u32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]] ; CHECK-NOT: v_lshl -; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]] +; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]] ; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) { bb: @@ -24,7 +24,7 @@ ; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0 ; CHECK: v_or_b32_e32 v[[OR:[0-9]+]], 0x1000, v[[SHL]] ; CHECK-NOT: v_lshl -; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]] +; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]] ; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) { bb: Index: test/CodeGen/AMDGPU/shl_add_ptr.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_ptr.ll +++ test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -35,7 +35,7 @@ ; SI-LABEL: {{^}}load_shl_base_lds_1: ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 -; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} +; SI: v_add_{{[iu]}}32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} ; SI-DAG: buffer_store_dword [[RESULT]] ; SI-DAG: buffer_store_dword [[ADDUSE]] ; SI: s_endpgm Index: test/CodeGen/AMDGPU/shrink-add-sub-constant.ll =================================================================== --- test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -6,7 +6,7 @@ ; GCN-LABEL: {{^}}v_test_i32_x_sub_64: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]] +; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -21,8 +21,8 @@ ; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]] -; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]] -; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]] +; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] +; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]] define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -39,7 +39,7 @@ ; GCN-LABEL: {{^}}v_test_i32_64_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -53,7 +53,7 @@ ; GCN-LABEL: {{^}}v_test_i32_x_sub_65: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]] +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -67,7 +67,7 @@ ; GCN-LABEL: {{^}}v_test_i32_65_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]] define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -81,7 +81,7 @@ ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 16, [[X]] +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -95,7 +95,7 @@ ; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, -16, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]] define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -109,7 +109,7 @@ ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 17, [[X]] +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -123,7 +123,7 @@ ; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]] define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/sibling-call.ll =================================================================== --- test/CodeGen/AMDGPU/sibling-call.ll +++ test/CodeGen/AMDGPU/sibling-call.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}i32_fastcc_i32_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { %add0 = add i32 %arg0, %arg1 @@ -13,7 +13,7 @@ ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: v_add_i32_e32 v0, vcc, v1, v +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v ; GCN: s_mov_b32 s5, s32 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24 ; GCN: s_waitcnt vmcnt(0) @@ -83,7 +83,7 @@ ; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32* byval align 4 %arg1) #1 { %arg1.load = load i32, i32* %arg1, align 4 @@ -122,9 +122,9 @@ ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4 ; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8 -; GCN-DAG: v_add_i32_e32 v0, vcc, v1, v0 -; GCN: v_add_i32_e32 v0, vcc, [[LOAD_0]], v0 -; GCN: v_add_i32_e32 v0, vcc, [[LOAD_1]], v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_0]], v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_1]], v0 ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { %val_firststack = extractvalue [32 x i32] %large, 30 Index: test/CodeGen/AMDGPU/sminmax.ll =================================================================== --- test/CodeGen/AMDGPU/sminmax.ll +++ test/CodeGen/AMDGPU/sminmax.ll @@ -17,9 +17,9 @@ } ; FUNC-LABEL: {{^}}v_abs_i32: -; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]] -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; EG: MAX_INT define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { @@ -33,7 +33,7 @@ } ; GCN-LABEL: {{^}}v_abs_i32_repeat_user: -; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] ; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] ; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]] define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { @@ -68,14 +68,14 @@ } ; FUNC-LABEL: {{^}}v_abs_v2i32: -; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] -; GCN: v_add_i32 -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 ; EG: MAX_INT ; EG: MAX_INT @@ -127,20 +127,20 @@ } ; FUNC-LABEL: {{^}}v_abs_v4i32: -; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]] -; GCN: v_add_i32 -; GCN: v_add_i32 -; GCN: v_add_i32 -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 ; EG: MAX_INT ; EG: MAX_INT Index: test/CodeGen/AMDGPU/sminmax.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -8,12 +8,12 @@ ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 -; VI: v_sub_i32_e32 -; VI-DAG: v_sub_i32_e32 +; VI: v_sub_u32_e32 +; VI-DAG: v_sub_u32_e32 ; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 -; VI: v_add_i32_e32 -; VI: v_add_i32_e32 +; VI: v_add_u32_e32 +; VI: v_add_u32_e32 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_sub_i32_e32 Index: test/CodeGen/AMDGPU/srem.ll =================================================================== --- test/CodeGen/AMDGPU/srem.ll +++ test/CodeGen/AMDGPU/srem.ll @@ -22,7 +22,7 @@ ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493 ; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]] ; SI: v_mul_lo_i32 -; SI: v_sub_i32 +; SI: v_sub_{{[iu]}}32 ; SI: s_endpgm define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %num = load i32, i32 addrspace(1) * %in Index: test/CodeGen/AMDGPU/ssubo.ll =================================================================== --- test/CodeGen/AMDGPU/ssubo.ll +++ test/CodeGen/AMDGPU/ssubo.ll @@ -51,7 +51,7 @@ } ; FUNC-LABEL: {{^}}v_ssubo_i64: -; SI: v_sub_i32_e32 +; SI: v_sub_{{[iu]}}32_e32 ; SI: v_subb_u32_e32 define amdgpu_kernel void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 Index: test/CodeGen/AMDGPU/store-hi16.ll =================================================================== --- test/CodeGen/AMDGPU/store-hi16.ll +++ test/CodeGen/AMDGPU/store-hi16.ll @@ -98,7 +98,7 @@ ; GCN: s_waitcnt ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 @@ -119,7 +119,7 @@ ; GCN: s_waitcnt ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}} -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 @@ -139,7 +139,7 @@ ; GCN: s_waitcnt ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 ; VI: flat_store_byte v[0:1], v{{[0-9]$}} @@ -160,7 +160,7 @@ ; GCN: s_waitcnt ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 @@ -272,7 +272,7 @@ ; GCN: s_waitcnt ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}} -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 ; VI: flat_store_short v[0:1], v2{{$}} @@ -289,8 +289,9 @@ ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset: ; GCN: s_waitcnt -; GCN: v_add_i32_e32 -; GCN: v_addc_u32_e32 +; GCN: v_add_{{[_cou]*}}32_e32 +; VI: v_addc_u32_e32 +; GFX9: v_addc_co_u32_e32 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} ; VI: flat_store_short v[0:1], v2{{$}} @@ -310,7 +311,7 @@ ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}} ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI: flat_store_byte v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt @@ -327,8 +328,9 @@ ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset: ; GCN: s_waitcnt -; GCN-DAG: v_add_i32_e32 -; GCN-DAG: v_addc_u32_e32 +; GCN-DAG: v_add_{{[_cou]*}}32_e32 +; VI-DAG: v_addc_u32_e32 +; GFX9-DAG: v_addc_co_u32_e32 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}} ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 Index: test/CodeGen/AMDGPU/sub.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/sub.v2i16.ll +++ test/CodeGen/AMDGPU/sub.v2i16.ll @@ -49,8 +49,8 @@ ; GCN-LABEL: {{^}}s_test_sub_v2i16_kernarg: ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; VI: v_subrev_i32_e32 -; VI: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_subrev_u32_e32 +; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 { %add = sub <2 x i16> %a, %b store <2 x i16> %add, <2 x i16> addrspace(1)* %out Index: test/CodeGen/AMDGPU/uaddo.ll =================================================================== --- test/CodeGen/AMDGPU/uaddo.ll +++ test/CodeGen/AMDGPU/uaddo.ll @@ -22,7 +22,7 @@ ; FIXME: Could do scalar ; FUNC-LABEL: {{^}}s_uaddo_i32: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -37,7 +37,7 @@ } ; FUNC-LABEL: {{^}}v_uaddo_i32: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -58,7 +58,7 @@ } ; FUNC-LABEL: {{^}}v_uaddo_i32_novcc: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -95,7 +95,7 @@ } ; FUNC-LABEL: {{^}}v_uaddo_i64: -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN: v_addc_u32 ; EG: ADDC_UINT Index: test/CodeGen/AMDGPU/udivrem.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem.ll +++ test/CodeGen/AMDGPU/udivrem.ll @@ -30,24 +30,24 @@ ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] -; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] +; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] -; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] -; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] +; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] +; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] -; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] +; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] +; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]], -; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]], +; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]], +; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]], ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]], +; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm @@ -114,47 +114,47 @@ ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm @@ -264,80 +264,80 @@ ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { Index: test/CodeGen/AMDGPU/uint_to_fp.i64.ll =================================================================== --- test/CodeGen/AMDGPU/uint_to_fp.i64.ll +++ test/CodeGen/AMDGPU/uint_to_fp.i64.ll @@ -21,7 +21,7 @@ ; GCN-DAG: v_cmp_eq_u64 ; GCN-DAG: v_cmp_gt_u64 -; GCN: v_add_i32_e32 [[VR:v[0-9]+]] +; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]] ; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]] ; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]] define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 { @@ -52,7 +52,7 @@ ; GCN-DAG: v_cmp_eq_u64 ; GCN-DAG: v_cmp_gt_u64 -; GCN: v_add_i32_e32 [[VR:v[0-9]+]] +; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]] ; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]] define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() Index: test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-cfg.ll +++ test/CodeGen/AMDGPU/uniform-cfg.ll @@ -560,7 +560,7 @@ } ; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi: -; GCN: v_add_i32_e32 +; GCN: v_add_{{[iu]}}32_e32 ; GCN: ds_write_b32 define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) { bb0: Index: test/CodeGen/AMDGPU/urem.ll =================================================================== --- test/CodeGen/AMDGPU/urem.ll +++ test/CodeGen/AMDGPU/urem.ll @@ -21,9 +21,9 @@ ; FUNC-LABEL: {{^}}test_urem_i32_7: ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925 ; SI: v_mul_hi_u32 [[MAGIC]], {{v[0-9]+}} -; SI: v_subrev_i32 +; SI: v_subrev_{{[iu]}}32 ; SI: v_mul_lo_i32 -; SI: v_sub_i32 +; SI: v_sub_{{[iu]}}32 ; SI: buffer_store_dword ; SI: s_endpgm define amdgpu_kernel void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { Index: test/CodeGen/AMDGPU/usubo.ll =================================================================== --- test/CodeGen/AMDGPU/usubo.ll +++ test/CodeGen/AMDGPU/usubo.ll @@ -22,7 +22,7 @@ ; FIXME: Could do scalar ; FUNC-LABEL: {{^}}s_usubo_i32: -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -37,7 +37,7 @@ } ; FUNC-LABEL: {{^}}v_usubo_i32: -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -58,7 +58,7 @@ } ; FUNC-LABEL: {{^}}v_usubo_i32_novcc: -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -97,7 +97,7 @@ } ; FUNC-LABEL: {{^}}v_usubo_i64: -; GCN: v_sub_i32 +; GCN: v_sub_{{[iu]}}32 ; GCN: v_subb_u32 ; EG-DAG: SUBB_UINT Index: test/CodeGen/AMDGPU/vop-shrink.ll =================================================================== --- test/CodeGen/AMDGPU/vop-shrink.ll +++ test/CodeGen/AMDGPU/vop-shrink.ll @@ -4,7 +4,7 @@ ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: ; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s -; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s +; SI: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s ; ModuleID = 'vop-shrink.ll' Index: test/CodeGen/AMDGPU/wqm.ll =================================================================== --- test/CodeGen/AMDGPU/wqm.ll +++ test/CodeGen/AMDGPU/wqm.ll @@ -169,7 +169,7 @@ ;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ;CHECK: buffer_load_dword ;CHECK: buffer_load_dword -;CHECK: v_add_i32_e32 +;CHECK: v_add_{{[iu]}}32_e32 define amdgpu_ps float @test_wwm2(i32 inreg %idx0, i32 inreg %idx1) { main_body: %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0) @@ -303,7 +303,7 @@ ;CHECK: v_mov_b32_e32 ;CHECK: s_not_b64 exec, exec ;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 -;CHECK: v_add_i32_e32 +;CHECK: v_add_{{[iu]}}32_e32 define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) { main_body: %src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0) Index: test/MC/AMDGPU/add-sub-no-carry.s =================================================================== --- test/MC/AMDGPU/add-sub-no-carry.s +++ test/MC/AMDGPU/add-sub-no-carry.s @@ -1,104 +1,94 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX9 %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefixes=GCN,VI %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GFX9 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s // FIXME: pre-gfx9 errors should be more useful -// FIXME: These should parse to VOP2 encoding v_add_u32 v1, v2, v3 -// GFX9: v_add_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x07,0x02,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68] +// ERR-SICIVI: error: instruction not supported on this GPU v_add_u32 v1, v2, s1 // GFX9: v_add_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x03,0x00,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_add_u32 v1, s1, v2 -// GFX9: v_add_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x34,0xd1,0x01,0x04,0x02,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// GFX9: v_add_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x68] +// ERR-SICIVI: error: instruction not supported on this GPU v_add_u32 v1, 4.0, v2 -// GFX9: v_add_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x34,0xd1,0xf6,0x04,0x02,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// GFX9: v_add_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x68] +// ERR-SICIVI: error: instruction not supported on this GPU v_add_u32 v1, v2, 4.0 // GFX9: v_add_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0xed,0x01,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_add_u32_e32 v1, v2, v3 // GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68] -// ERR-SICIVI: :19: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_add_u32_e32 v1, s1, v3 // GFX9: v_add_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x68] -// ERR-SICIVI: :19: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_sub_u32 v1, v2, v3 -// GFX9: v_sub_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x07,0x02,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a] +// ERR-SICIVI: error: instruction not supported on this GPU v_sub_u32 v1, v2, s1 // GFX9: v_sub_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x03,0x00,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_sub_u32 v1, s1, v2 -// GFX9: v_sub_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x35,0xd1,0x01,0x04,0x02,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// GFX9: v_sub_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6a] +// ERR-SICIVI: error: instruction not supported on this GPU v_sub_u32 v1, 4.0, v2 -// GFX9: v_sub_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x35,0xd1,0xf6,0x04,0x02,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// GFX9: v_sub_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6a] +// ERR-SICIVI: error: instruction not supported on this GPU v_sub_u32 v1, v2, 4.0 // GFX9: v_sub_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0xed,0x01,0x00] -// ERR-SICIVI: :15: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_sub_u32_e32 v1, v2, v3 // GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a] -// ERR-SICIVI: :19: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_sub_u32_e32 v1, s1, v3 // GFX9: v_sub_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6a] -// ERR-SICIVI: :19: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_subrev_u32 v1, v2, v3 -// GFX9: v_subrev_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x07,0x02,0x00] -// ERR-SICIVI: :18: error: invalid operand for instruction +// GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c] +// ERR-SICIVI: error: instruction not supported on this GPU v_subrev_u32 v1, v2, s1 // GFX9: v_subrev_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x03,0x00,0x00] -// ERR-SICIVI: :18: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_subrev_u32 v1, s1, v2 -// GFX9: v_subrev_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x36,0xd1,0x01,0x04,0x02,0x00] -// ERR-SICIVI: :18: error: invalid operand for instruction +// GFX9: v_subrev_u32_e32 v1, s1, v2 ; encoding: [0x01,0x04,0x02,0x6c] +// ERR-SICIVI: error: instruction not supported on this GPU v_subrev_u32 v1, 4.0, v2 -// GFX9: v_subrev_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x36,0xd1,0xf6,0x04,0x02,0x00] -// ERR-SICIVI: :18: error: invalid operand for instruction +// GFX9: v_subrev_u32_e32 v1, 4.0, v2 ; encoding: [0xf6,0x04,0x02,0x6c] +// ERR-SICIVI: error: instruction not supported on this GPU v_subrev_u32 v1, v2, 4.0 // GFX9: v_subrev_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0xed,0x01,0x00] -// ERR-SICIVI: :18: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_subrev_u32_e32 v1, v2, v3 // GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c] -// ERR-SICIVI: :22: error: invalid operand for instruction +// ERR-SICIVI: error: instruction not supported on this GPU v_subrev_u32_e32 v1, s1, v3 // GFX9: v_subrev_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6c] -// ERR-SICIVI: :22: error: invalid operand for instruction - - - -v_add_u32 v1, vcc, v2, v3 -// GCN: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] - -v_add_u32 v1, s[0:1], v2, v3 -// GCN: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +// ERR-SICIVI: error: instruction not supported on this GPU Index: test/MC/AMDGPU/regression/bug28413.s =================================================================== --- test/MC/AMDGPU/regression/bug28413.s +++ test/MC/AMDGPU/regression/bug28413.s @@ -24,11 +24,3 @@ v_mov_b32_e32 v0, 3.125 // GCN: v_mov_b32_e32 v0, 0x40480000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x48,0x40] - -v_add_i32 v0, vcc, 0.5, v0 -// SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a] -// VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32] - -v_add_i32 v0, vcc, 3.125, v0 -// SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40] -// VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40] \ No newline at end of file Index: test/MC/AMDGPU/vop2.s =================================================================== --- test/MC/AMDGPU/vop2.s +++ test/MC/AMDGPU/vop2.s @@ -95,11 +95,11 @@ v_mul_i32_i24_e64 v1, 3, s3 // SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a] -// VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32] +// NOVI: error: instruction not supported on this GPU v_add_i32_e32 v0, vcc, 0.5, v0 // SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40] -// VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40] +// NOVI: error: instruction not supported on this GPU v_add_i32_e32 v0, vcc, 3.125, v0 //===----------------------------------------------------------------------===// @@ -271,59 +271,59 @@ v_mbcnt_hi_u32_b32_e64 v1, v2, v3 // SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] -// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +// NOVI: error: instruction not supported on this GPU v_add_i32_e32 v1, vcc, v2, v3 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] -// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +// NOVI: error: instruction not supported on this GPU v_add_i32 v1, s[0:1], v2, v3 // SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] -// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +// NOVI: error: instruction not supported on this GPU v_add_i32_e64 v1, s[0:1], v2, v3 // SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00] -// VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00] +// NOVI: error: instruction not supported on this GPU v_add_i32_e64 v1, vcc, v2, v3 -// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] -// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +// NOSICI: error: instruction not supported on this GPU +// VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] v_add_u32 v1, vcc, v2, v3 -// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] -// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +// NOSICI: error: instruction not supported on this GPU +// VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] v_add_u32 v1, s[0:1], v2, v3 // SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +// NOVI: error: instruction not supported on this GPU v_sub_i32 v1, vcc, v2, v3 // SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] -// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +// NOVI: error: instruction not supported on this GPU v_sub_i32 v1, s[0:1], v2, v3 -// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +// NOSICI: error: instruction not supported on this GPU +// VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] v_sub_u32 v1, vcc, v2, v3 -// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] -// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +// NOSICI: error: instruction not supported on this GPU +// VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] v_sub_u32 v1, s[0:1], v2, v3 // SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] -// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +// NOVI: error: instruction not supported on this GPU v_subrev_i32 v1, vcc, v2, v3 // SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +// NOVI: error: instruction not supported on this GPU v_subrev_i32 v1, s[0:1], v2, v3 -// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] -// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +// NOSICI: error: instruction not supported on this GPU +// VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] v_subrev_u32 v1, vcc, v2, v3 -// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +// NOSICI: error: instruction not supported on this GPU +// VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] v_subrev_u32 v1, s[0:1], v2, v3 // SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50] Index: test/MC/AMDGPU/vop_dpp.s =================================================================== --- test/MC/AMDGPU/vop_dpp.s +++ test/MC/AMDGPU/vop_dpp.s @@ -509,29 +509,65 @@ v_ldexp_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: -// VI9: v_add_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] -v_add_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// NOGFX9: error: +// VI: v_add_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] +v_add_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: -// VI9: v_sub_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] -v_sub_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// NOGFX9: error: +// VI: v_sub_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] +v_sub_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: -// VI9: v_subrev_i32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] -v_subrev_i32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// NOGFX9: error: +// VI: v_subrev_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] +v_subrev_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: -// VI9: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] +// NOGFX9: error: +// VI: v_addc_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] v_addc_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: -// VI9: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] +// NOGFX9: error: +// VI: v_subb_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] v_subb_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: error: -// VI9: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] +// NOGFX9: error: +// VI: v_subbrev_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// NOSICI: error: +// NOVI: error: +// GFX9: v_add_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x32,0x02,0x01,0x09,0xa1] +v_add_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_sub_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x34,0x02,0x01,0x09,0xa1] +v_sub_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_subrev_co_u32_dpp v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x36,0x02,0x01,0x09,0xa1] +v_subrev_co_u32 v1, vcc, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_addc_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x38,0x02,0x01,0x09,0xa1] +v_addc_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_subb_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3a,0x02,0x01,0x09,0xa1] +v_subb_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] +v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + //===----------------------------------------------------------------------===// // Check that immideates and scalar regs are not supported //===----------------------------------------------------------------------===// Index: test/MC/AMDGPU/vop_sdwa.s =================================================================== --- test/MC/AMDGPU/vop_sdwa.s +++ test/MC/AMDGPU/vop_sdwa.s @@ -497,29 +497,65 @@ v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: -// GFX89: v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] -v_add_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// NOGFX9: error: +// VI: v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] +v_add_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: -// GFX89: v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] -v_sub_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// NOGFX9: error: +// VI: v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] +v_sub_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: -// GFX89: v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] -v_subrev_i32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// NOGFX9: error: +// VI: v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] +v_subrev_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: -// GFX89: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] +// NOGFX9: error: +// VI: v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] v_addc_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: -// GFX89: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] +// NOGFX9: error: +// VI: v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] v_subb_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 // NOSICI: error: -// GFX89: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] +// NOGFX9: error: +// VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// NOSICI: error: +// NOVI: error: +// GFX9: v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x32,0x02,0x06,0x05,0x02] +v_add_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x34,0x02,0x06,0x05,0x02] +v_sub_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x36,0x02,0x06,0x05,0x02] +v_subrev_co_u32_sdwa v1, vcc, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x38,0x02,0x06,0x05,0x02] +v_addc_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3a,0x02,0x06,0x05,0x02] +v_subb_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 + +// NOSICI: error: +// NOVI: error: +// GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] +v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 + //===----------------------------------------------------------------------===// // Check VOPC opcodes //===----------------------------------------------------------------------===// Index: test/MC/Disassembler/AMDGPU/vop2_vi.txt =================================================================== --- test/MC/Disassembler/AMDGPU/vop2_vi.txt +++ test/MC/Disassembler/AMDGPU/vop2_vi.txt @@ -93,46 +93,46 @@ # VI: v_mbcnt_hi_u32_b32 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] 0x01 0x00 0x8d 0xd2 0x02 0x07 0x02 0x00 -# VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +# VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] 0x02 0x07 0x02 0x32 -# VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +# VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00 -# VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +# VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00 -# VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00] +# VI: v_add_u32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00] 0x01 0x6a 0x19 0xd1 0x02 0x07 0x02 0x00 -# VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +# VI: v_add_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] 0x02 0x07 0x02 0x32 -# VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +# VI: v_add_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0x19 0xd1 0x02 0x07 0x02 0x00 -# VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +# VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] 0x02 0x07 0x02 0x34 -# VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +# VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0x1a 0xd1 0x02 0x07 0x02 0x00 -# VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +# VI: v_sub_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] 0x02 0x07 0x02 0x34 -# VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +# VI: v_sub_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0x1a 0xd1 0x02 0x07 0x02 0x00 -# VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +# VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] 0x02 0x07 0x02 0x36 -# VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +# VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0x1b 0xd1 0x02 0x07 0x02 0x00 -# VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +# VI: v_subrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] 0x02 0x07 0x02 0x36 -# VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +# VI: v_subrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] 0x01 0x00 0x1b 0xd1 0x02 0x07 0x02 0x00 # VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38] Index: test/Object/AMDGPU/objdump.s =================================================================== --- test/Object/AMDGPU/objdump.s +++ test/Object/AMDGPU/objdump.s @@ -12,7 +12,7 @@ s_waitcnt lgkmcnt(0) s_add_u32 s0, s7, s0 BB0: - v_add_i32_e32 v1, vcc, s0, v1 + v_add_u32_e32 v1, vcc, s0, v1 BB1: s_movk_i32 s0, 0x483 v_cmp_ge_i32_e32 vcc, s0, v0 @@ -37,7 +37,7 @@ v_ashrrev_i32_e32 v77, 31, v76 v_lshlrev_b64 v[10:11], 2, v[76:77] s_waitcnt lgkmcnt(0) - v_add_i32_e32 v10, vcc, s8, v10 + v_add_u32_e32 v10, vcc, s8, v10 v_mov_b32_e32 v6, s9 v_addc_u32_e32 v11, vcc, v6, v11, vcc flat_load_dword v0, v[10:11] @@ -53,7 +53,7 @@ // CHECK: s_waitcnt lgkmcnt(0) // 000000000110: BF8C007F // CHECK: s_add_u32 s0, s7, s0 // 000000000114: 80000007 // CHECK: BB0: -// CHECK: v_add_i32_e32 v1, vcc, s0, v1 // 000000000118: 32020200 +// CHECK: v_add_u32_e32 v1, vcc, s0, v1 // 000000000118: 32020200 // CHECK: BB1: // CHECK: s_movk_i32 s0, 0x483 // 00000000011C: B0000483 // CHECK: v_cmp_ge_i32_e32 vcc, s0, v0 // 000000000120: 7D8C0000 @@ -74,7 +74,7 @@ // CHECK: v_ashrrev_i32_e32 v77, 31, v76 // 000000000250: 229A989F // CHECK: v_lshlrev_b64 v[10:11], 2, v[76:77] // 000000000254: D28F000A 00029882 // CHECK: s_waitcnt lgkmcnt(0) // 00000000025C: BF8C007F -// CHECK: v_add_i32_e32 v10, vcc, s8, v10 // 000000000260: 32141408 +// CHECK: v_add_u32_e32 v10, vcc, s8, v10 // 000000000260: 32141408 // CHECK: v_mov_b32_e32 v6, s9 // 000000000264: 7E0C0209 // CHECK: v_addc_u32_e32 v11, vcc, v6, v11, vcc // 000000000268: 38161706 // CHECK: flat_load_dword v0, v[10:11] // 00000000026C: DC500000 0000000A Index: test/tools/llvm-objdump/AMDGPU/source-lines.ll =================================================================== --- test/tools/llvm-objdump/AMDGPU/source-lines.ll +++ test/tools/llvm-objdump/AMDGPU/source-lines.ll @@ -12,7 +12,7 @@ ; LINE: ; {{.*}}source-lines.cl:3 ; LINE: v_mov_b32_e32 v{{[0-9]+}}, 0x888 ; LINE: ; {{.*}}source-lines.cl:4 -; LINE: v_add_i32_e32 +; LINE: v_add_u32_e32 ; LINE: ; {{.*}}source-lines.cl:5 ; LINE: flat_store_dword ; Epilogue. @@ -28,7 +28,7 @@ ; SOURCE: ; int var1 = 0x888; ; SOURCE: v_mov_b32_e32 v{{[0-9]+}}, 0x888 ; SOURCE: ; int var2 = var0 + var1; -; SOURCE: v_add_i32_e32 +; SOURCE: v_add_u32_e32 ; SOURCE: ; *Out = var2; ; SOURCE: flat_store_dword ; Epilogue.