Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1778,7 +1778,7 @@ } // Return type of input modifiers operand for specified input operand -class getSrcMod { +class getSrcMod { bit isFP = isFloatType.ret; bit isPacked = isPackedType.ret; Operand ret = !if(!eq(VT.Size, 64), @@ -1788,7 +1788,7 @@ FP16InputMods, FP32InputMods ), - !if(EnableF32SrcMods, FP32InputMods, Int32InputMods)) + Int32InputMods) ); } @@ -1813,14 +1813,14 @@ } // Return type of input modifiers operand for specified input operand for DPP -class getSrcModVOP3DPP { +class getSrcModVOP3DPP { bit isFP = isFloatType.ret; bit isPacked = isPackedType.ret; Operand ret = !if (isFP, !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, FP32VCSrcInputMods), - !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods)); + Int32VCSrcInputMods); } // Return type of input modifiers operand specified input operand for SDWA @@ -2429,11 +2429,9 @@ int Pattern = 1; } -class VOPProfile _ArgVT, bit _EnableF32SrcMods = 0, - bit _EnableClamp = 0> { +class VOPProfile _ArgVT, bit _EnableClamp = 0> { field list ArgVT = _ArgVT; - field bit EnableF32SrcMods = _EnableF32SrcMods; field bit EnableClamp = _EnableClamp; field bit IsTrue16 = 0; @@ -2459,15 +2457,15 @@ field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT.ret; field RegisterOperand Src1SDWA = getSDWASrcForVT.ret; - field Operand Src0Mod = getSrcMod.ret; - field Operand Src1Mod = getSrcMod.ret; - field Operand Src2Mod = getSrcMod.ret; + field Operand Src0Mod = getSrcMod.ret; + field Operand Src1Mod = getSrcMod.ret; + field Operand Src2Mod = getSrcMod.ret; field Operand Src0ModDPP = getSrcModDPP.ret; field Operand Src1ModDPP = getSrcModDPP.ret; field Operand Src2ModDPP = getSrcModDPP.ret; field Operand Src0ModVOP3DPP = getSrcModDPP.ret; field Operand Src1ModVOP3DPP = getSrcModDPP.ret; - field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; + field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; field Operand Src1ModSDWA = getSrcModSDWA.ret; @@ -2481,12 +2479,10 @@ field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); - // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. field bit HasSrc0FloatMods = isFloatType.ret; field bit HasSrc1FloatMods = isFloatType.ret; field bit HasSrc2FloatMods = isFloatType.ret; - // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods. field bit HasSrc0IntMods = isIntType.ret; field bit HasSrc1IntMods = isIntType.ret; field bit HasSrc2IntMods = isIntType.ret; @@ -2507,8 +2503,7 @@ field bit HasModifiers = !or(isModifierType.ret, isModifierType.ret, isModifierType.ret, - HasOMod, - EnableF32SrcMods); + HasOMod); field bit HasSrc0Mods = HasModifiers; field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); @@ -2652,7 +2647,7 @@ def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; -def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>; +def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; @@ -2699,7 +2694,7 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>; +def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; Index: llvm/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -505,7 +505,7 @@ } // Write out to vcc or arbitrary SGPR. -def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> { let Asm32 = "$vdst, vcc, $src0, $src1"; let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; @@ -532,7 +532,7 @@ // Write out to vcc or arbitrary SGPR and read in from vcc or // arbitrary SGPR. -def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { +def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> { let HasSrc2Mods = 0; let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; @@ -576,7 +576,7 @@ } // Read in from vcc or arbitrary SGPR. -class VOP2e_SGPR ArgVT> : VOPProfile { +class VOP2e_SGPR ArgVT> : VOPProfile { let Asm32 = "$vdst, $src0, $src1"; let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; @@ -591,14 +591,20 @@ // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. - let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); + let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1); + + let HasModifiers = 1; + + // Select FP modifiers for VOP3 + let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods); + let Src1Mod = Src0Mod; let HasSrc0IntMods = 0; let HasSrc1IntMods = 0; let HasSrc0FloatMods = 1; let HasSrc1FloatMods = 1; - let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, - FP32SDWAInputMods:$src1_modifiers, Src1SDWA:$src1, + let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0, + FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); Index: llvm/test/MC/AMDGPU/gfx10_asm_vop2.s =================================================================== --- llvm/test/MC/AMDGPU/gfx10_asm_vop2.s +++ llvm/test/MC/AMDGPU/gfx10_asm_vop2.s @@ -35,6 +35,10 @@ // W64: encoding: [0xf7,0x04,0x0a,0x02] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cndmask_b32_e32 v5, |-4.0|, v2, vcc +// W64: encoding: [0xf6,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + v_cndmask_b32_e32 v5, v1, v255, vcc // W64: encoding: [0x01,0xff,0x0b,0x02] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -115,6 +119,10 @@ // W64: encoding: [0xf9,0x04,0x0a,0x02,0x01,0x06,0x26,0x16] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cndmask_b32_sdwa v5, |0.5|, -v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// W64: encoding: [0xf9,0x04,0x0a,0x02,0xf0,0x06,0xa6,0x16] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 // W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x98,0x00] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -155,6 +163,10 @@ // W32: encoding: [0xf7,0x04,0x0a,0x02] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cndmask_b32_e32 v5, |-4.0|, v2, vcc_lo +// W32: encoding: [0xf6,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + v_cndmask_b32_e32 v5, v1, v255, vcc_lo // W32: encoding: [0x01,0xff,0x0b,0x02] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode @@ -235,6 +247,10 @@ // W32: encoding: [0xf9,0x04,0x0a,0x02,0x01,0x06,0x26,0x16] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cndmask_b32_sdwa v5, |0.5|, -v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// W32: encoding: [0xf9,0x04,0x0a,0x02,0xf0,0x06,0xa6,0x16] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + v_cndmask_b32_sdwa v5, sext(v1), v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD // W32-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand