Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -215,6 +215,10 @@ (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); } + bool isSCSrc64() const { + return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm(); + } + bool isVCSrc32() const { return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -990,7 +990,14 @@ // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { - RegisterOperand ret = !if(!eq(VT.Size, 64), VCSrc_64, VCSrc_32); + RegisterOperand ret = + !if(!eq(VT.Size, 64), + VCSrc_64, + !if(!eq(VT.Value, i1.Value), + SCSrc_64, + VCSrc_32 + ) + ); } // Returns 1 if the source arguments have modifiers, 0 if they do not. @@ -1070,7 +1077,6 @@ "$dst, "#src0#src1#src2#"$clamp"#"$omod"); } - class VOPProfile _ArgVT> { field list ArgVT = _ArgVT; @@ -1132,17 +1138,26 @@ def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -class VOP2b_Profile : VOPProfile<[vt, vt, vt, untyped]> { +// Write out to vcc or arbitrary SGPR. +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$dst, vcc, $src0, $src1"; let Asm64 = "$dst, $sdst, $src0, $src1"; let Outs32 = (outs DstRC:$dst); let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); } -def VOP2b_I32_I1_I32_I32 : VOP2b_Profile; - -def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile { +// Write out to vcc or arbitrary SGPR and read in from vcc or +// arbitrary SGPR. +def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Src0RC32 = VCSrc_32; + let Asm32 = "$dst, vcc, $src0, $src1, vcc"; + let Asm64 = "$dst, $sdst, $src0, $src1, $src2"; + let Outs32 = (outs DstRC:$dst); + let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); + + // Suppress src2 implied by type since the 32-bit encoding uses an + // implicit VCC use. + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); } // VOPC instructions are a special case because for the 32-bit @@ -1429,32 +1444,19 @@ // No VI instruction. This class is for SI only. } -// XXX - Is v_div_scale_{f32|f64} only available in vop3b without -// option of implicit vcc use? -multiclass VOP3b_2_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { - def "" : VOP3_Pseudo , - VOP2_REV; - - def _si : VOP3b_Real_si , - VOP3DisableFields<1, 0, HasMods>; - - def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, 0, HasMods>; -} - -multiclass VOP3b_3_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { +// Two operand VOP3b instruction that may have a 3rd SGPR bool operand +// instead of an implicit VCC as in the VOP2b format. +multiclass VOP3b_2_3_m pattern, string opName, string revOp, + bit HasMods = 1, bit useSGPRInput = 0, + bit UseFullOp = 0> { def "" : VOP3_Pseudo ; - def _si : VOP3b_Real_si , - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSGPRInput, HasMods>; def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSGPRInput, HasMods>; } multiclass VOP3_C_m pat32, dag ins64, string asm64, list pat64, - string revOp, bit HasMods> { + string revOp, bit HasMods, bit useSGPRInput> { - defm _e32 : VOP2_m ; + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { + defm _e32 : VOP2_m ; + } - defm _e64 : VOP3b_2_m ; } @@ -1596,7 +1600,7 @@ i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), - revOp, P.HasModifiers + revOp, P.HasModifiers, !eq(P.NumSrcArgs, 3) >; // A VOP2 instruction that is VOP3-only on VI. @@ -1847,14 +1851,14 @@ multiclass VOP3b_Helper pattern> : - VOP3b_3_m < + VOP3b_2_3_m < op, (outs vrc:$vdst, SReg_64:$sdst), (ins InputModsNoDefault:$src0_modifiers, arc:$src0, InputModsNoDefault:$src1_modifiers, arc:$src1, InputModsNoDefault:$src2_modifiers, arc:$src2, ClampMod:$clamp, omod:$omod), opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern, - opName, opName, 1, 1 + opName, opName, 1, 0, 1 >; multiclass VOP3b_64 pattern> : Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1507,7 +1507,7 @@ defm V_MADAK_F32 : VOP2MADK , "v_madak_f32">; } // End isCommutable = 1 -let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC +let isCommutable = 1 in { // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. @@ -1522,19 +1522,17 @@ VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32" >; -let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2bInst , "v_addc_u32", - VOP2b_I32_I1_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBB_U32 : VOP2bInst , "v_subb_u32", - VOP2b_I32_I1_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32", - VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32" + VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32" >; -} // End Uses = [VCC] -} // End isCommutable = 1, Defs = [VCC] +} // End isCommutable = 1 defm V_READLANE_B32 : VOP2SI_3VI_m < vop3 <0x001, 0x289>, Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -281,3 +281,13 @@ let OperandType = "OPERAND_REG_INLINE_C"; let ParserMatchClass = RegImmMatcher<"VCSrc64">; } + +//===----------------------------------------------------------------------===// +// SCSrc_* Operands with an SGPR or an inline constant +//===----------------------------------------------------------------------===// + +def SCSrc_64 : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"SCSrc64">; +} Index: test/MC/AMDGPU/vop2-err.s =================================================================== --- test/MC/AMDGPU/vop2-err.s +++ test/MC/AMDGPU/vop2-err.s @@ -35,4 +35,28 @@ v_add_i32_e32 v1, s[0:1], v2, v3 // CHECK: error: invalid operand for instruction +v_addc_u32_e32 v1, vcc, v2, v3, s[2:3] +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, s[0:1], v2, v3, s[2:3] +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, -1 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, 123 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, s0 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, -1, v2, v3, s0 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e64 v1, s[0:1], v2, v3, 123 +// CHECK: error: invalid operand for instruction + +v_addc_u32 v1, s[0:1], v2, v3, 123 +// CHECK: error: invalid operand for instruction + // TODO: Constant bus restrictions Index: test/MC/AMDGPU/vop2.s =================================================================== --- test/MC/AMDGPU/vop2.s +++ test/MC/AMDGPU/vop2.s @@ -307,29 +307,54 @@ // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] v_subrev_u32 v1, s[0:1], v2, v3 -// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] -// VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] -v_addc_u32 v1, vcc, v2, v3 +// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32 v1, vcc, v2, v3, vcc -// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] -// VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00] -v_addc_u32 v1, s[0:1], v2, v3 +// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32_e32 v1, vcc, v2, v3, vcc -// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] -// VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] -v_subb_u32 v1, vcc, v2, v3 -// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00] -v_subb_u32 v1, s[0:1], v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01] +v_addc_u32 v1, s[0:1], v2, v3, vcc -// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] -// VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] -v_subbrev_u32 v1, vcc, v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] +v_addc_u32 v1, s[0:1], v2, v3, s[2:3] -// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00] -v_subbrev_u32 v1, s[0:1], v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] +v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] + +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03] +v_addc_u32_e64 v1, s[0:1], v2, v3, -1 + +// SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03] +// VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03] +v_addc_u32_e64 v1, vcc, v2, v3, -1 + +// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01] +v_addc_u32_e64 v1, vcc, v2, v3, vcc + +// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52] +// VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a] +v_subb_u32 v1, vcc, v2, v3, vcc + +// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01] +v_subb_u32 v1, s[0:1], v2, v3, vcc + +// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54] +// VI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c] +v_subbrev_u32 v1, vcc, v2, v3, vcc + +// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01] +v_subbrev_u32 v1, s[0:1], v2, v3, vcc // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]