Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -159,6 +159,9 @@ SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3OMods(SDValue In, SDValue &Src, + SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp) const; @@ -1666,6 +1669,18 @@ return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, + SDValue &Clamp, SDValue &Omod) const { + Src = In; + + SDLoc DL(In); + // FIXME: Handle Clamp and Omod + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + + return true; +} + bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const { unsigned Mods = 0; Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1018,11 +1018,13 @@ void cvtId(MCInst &Inst, const OperandVector &Operands); void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); void cvtMIMG(MCInst &Inst, const OperandVector &Operands); @@ -3676,6 +3678,15 @@ } } +void AMDGPUAsmParser::cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands) { + uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; + if (TSFlags & SIInstrFlags::VOP3) { + cvtVOP3OMod(Inst, Operands); + } else { + cvtId(Inst, Operands); + } +} + static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -3735,6 +3746,28 @@ } } +void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) { + OptionalImmIndexMap OptionalIdx; + + unsigned I = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { + ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); + } + + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } + } + + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); +} + void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptIdx; Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -659,6 +659,8 @@ // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern; +def VOP3OMods : ComplexPattern; + def VOP3PMods : ComplexPattern; def VOP3PMods0 : ComplexPattern; Index: lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP1Instructions.td +++ lib/Target/AMDGPU/VOP1Instructions.td @@ -85,10 +85,17 @@ } class getVOP1Pat64 : LetDummies { - list ret = !if(P.HasModifiers, - [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, - i32:$src0_modifiers, i1:$clamp, i32:$omod))))], - [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]); + list ret = + !if(P.HasModifiers, + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, + i32:$src0_modifiers, + i1:$clamp, i32:$omod))))], + !if(P.HasOMod, + [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, + i1:$clamp, i32:$omod))))], + [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] + ) + ); } multiclass VOP1Inst ; } +// Special profile for instructions which have clamp +// and output modifiers (but have no input modifiers) +class VOPProfileI2F : + VOPProfile<[dstVt, srcVt, untyped, untyped]> { + + let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); + let Asm64 = "$vdst, $src0$clamp$omod"; + + let HasModifiers = 0; + let HasClamp = 1; + let HasOMod = 1; +} + +def VOP1_F64_I32 : VOPProfileI2F ; +def VOP1_F32_I32 : VOPProfileI2F ; +def VOP1_F16_I16 : VOPProfileI2F ; + //===----------------------------------------------------------------------===// // VOP1 Instructions //===----------------------------------------------------------------------===// @@ -144,24 +168,24 @@ let SchedRW = [WriteQuarterRate32] in { defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; -defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>; -defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>; -defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>; +defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; +defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; +defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; -defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>; +defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; -defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>; -defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>; -defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>; -defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>; +defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; +defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; +defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; +defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; -defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>; +defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; } // End SchedRW = [WriteQuarterRate32] defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; @@ -299,8 +323,8 @@ let SubtargetPredicate = isVI in { -defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>; -defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>; +defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; +defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; Index: lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPInstructions.td +++ lib/Target/AMDGPU/VOPInstructions.td @@ -107,8 +107,12 @@ let AsmVariantName = AMDGPUAsmVariants.VOP3; let AsmMatchConverter = !if(!eq(VOP3Only,1), - !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"), - !if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", "")); + !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"), + !if(!eq(P.HasModifiers, 1), + "cvtVOP3_2_mod", + !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "") + ) + ); VOPProfile Pfl = P; } Index: test/MC/AMDGPU/vop3-modifiers.s =================================================================== --- test/MC/AMDGPU/vop3-modifiers.s +++ test/MC/AMDGPU/vop3-modifiers.s @@ -255,4 +255,134 @@ // CHECK: [0x00,0x00,0xc4,0xd1,0x00,0x00,0xe0,0x83] v_cubeid_f32 v0, s0, s0, abs(0x3e22f983) -// CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03] \ No newline at end of file +// CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03] + + +//---------------------------------------------------------------------------// +// VOP3 Instructions without Input Modifiers but with Output Modifiers +//---------------------------------------------------------------------------// + +v_cvt_f64_i32_e64 v[5:6], s1 clamp +// CHECK: [0x05,0x80,0x44,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], s1 mul:2 +// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f64_i32_e64 v[5:6], s1 mul:4 +// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f64_i32_e64 v[5:6], s1 div:2 +// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_f64_u32_e64 v[5:6], s1 clamp +// CHECK: [0x05,0x80,0x56,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], s1 mul:2 +// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f64_u32_e64 v[5:6], s1 mul:4 +// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f64_u32_e64 v[5:6], s1 div:2 +// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_f32_i32_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x45,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f32_i32_e64 v5, s1 mul:4 +// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f32_i32_e64 v5, s1 div:2 +// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_f32_u32_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x46,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f32_u32_e64 v5, s1 mul:4 +// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f32_u32_e64 v5, s1 div:2 +// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_off_f32_i4_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x4e,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_off_f32_i4_e64 v5, s1 mul:4 +// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_off_f32_i4_e64 v5, s1 div:2 +// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_f32_ubyte0_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x51,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f32_ubyte0_e64 v5, s1 mul:4 +// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f32_ubyte0_e64 v5, s1 div:2 +// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_f32_ubyte1_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x52,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f32_ubyte1_e64 v5, s1 mul:4 +// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f32_ubyte1_e64 v5, s1 div:2 +// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_f32_ubyte2_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x53,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f32_ubyte2_e64 v5, s1 mul:4 +// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f32_ubyte2_e64 v5, s1 div:2 +// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x18] + + +v_cvt_f32_ubyte3_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x54,0xd1,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, s1 mul:2 +// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x08] + +v_cvt_f32_ubyte3_e64 v5, s1 mul:4 +// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x10] + +v_cvt_f32_ubyte3_e64 v5, s1 div:2 +// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x18] + + +// NB: output modifiers are not supported for f16 +v_cvt_f16_i16_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x7a,0xd1,0x01,0x00,0x00,0x00] + +// NB: output modifiers are not supported for f16 +v_cvt_f16_u16_e64 v5, s1 clamp +// CHECK: [0x05,0x80,0x79,0xd1,0x01,0x00,0x00,0x00]