Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -80,6 +80,7 @@ bool IsFPImm; ImmTy Type; int64_t Val; + int Modifiers; }; struct RegOp { @@ -110,15 +111,20 @@ } void addRegOrImmOperands(MCInst &Inst, unsigned N) const { - if (isReg()) + if (isRegKind()) addRegOperands(Inst, N); else addImmOperands(Inst, N); } - void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { - Inst.addOperand(MCOperand::createImm(Reg.Modifiers)); - addRegOperands(Inst, N); + void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { + if (isRegKind()) { + Inst.addOperand(MCOperand::createImm(Reg.Modifiers)); + addRegOperands(Inst, N); + } else { + Inst.addOperand(MCOperand::createImm(Imm.Modifiers)); + addImmOperands(Inst, N); + } } void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { @@ -144,12 +150,14 @@ return Kind == Immediate; } - bool isInlineImm() const { - float F = BitsToFloat(Imm.Val); - // TODO: Add 0.5pi for VI - return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) || + bool isInlinableImm() const { + if (!isImm() || Imm.Type != AMDGPUOperand::ImmTyNone) // Only plain immediates are inlinable (e.g. "clamp" attribute is not) + return false; + const float F = BitsToFloat(Imm.Val); + // TODO: Add 1/(2*pi) for VI + return (Imm.Val <= 64 && Imm.Val >= -16) || (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 || - F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0)); + F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0); } bool isDSOffset0() const { @@ -179,8 +187,8 @@ return Kind == Register && Reg.Modifiers == 0; } - bool isRegWithInputMods() const { - return Kind == Register; + bool isRegOrImmWithInputMods() const { + return Kind == Register || isInlinableImm(); } bool isClamp() const { @@ -196,13 +204,16 @@ } void setModifiers(unsigned Mods) { - assert(isReg()); - Reg.Modifiers = Mods; + assert(isReg() || (isImm() && Imm.Modifiers == 0)); + if (isReg()) + Reg.Modifiers = Mods; + else + Imm.Modifiers = Mods; } bool hasModifiers() const { - assert(isRegKind()); - return Reg.Modifiers != 0; + assert(isRegKind() || isImm()); + return isRegKind() ? Reg.Modifiers != 0 : Imm.Modifiers != 0; } unsigned getReg() const override { @@ -218,36 +229,39 @@ } bool isSCSrc32() const { - return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); + return isInlinableImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); } - bool isSSrc32() const { - return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID)); + bool isSCSrc64() const { + return isInlinableImm() || (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); } - bool isSSrc64() const { - return isImm() || isInlineImm() || - (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); + bool isSSrc32() const { + return isImm() || isSCSrc32(); } - bool isSCSrc64() const { - return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm(); + bool isSSrc64() const { + // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. See isVSrc64(). + return isImm() || isSCSrc64(); } bool isVCSrc32() const { - return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + return isInlinableImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); } bool isVCSrc64() const { - return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); + return isInlinableImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); } bool isVSrc32() const { - return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); + return isImm() || isVCSrc32(); } bool isVSrc64() const { - return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID)); + // TODO: Check if the 64-bit value (coming from assembly source) can be narrowed to 32 bits (in the instruction stream). + // That require knowledge of instruction type (unsigned/signed, floating or "untyped"/B64), see [AMD GCN3 ISA 6.3.1] + // How 64-bit values are formed from 32-bit literals in _B64 instructions? + return isImm() || isVCSrc64(); } bool isMem() const override { @@ -276,7 +290,10 @@ OS << "'; break; case Immediate: - OS << getImm(); + if (Imm.Type != AMDGPUOperand::ImmTyNone) + OS << getImm(); + else + OS << "<" << getImm() << " mods: " << Imm.Modifiers << '>'; break; case Token: OS << '\'' << getToken() << '\''; @@ -294,6 +311,7 @@ Op->Imm.Val = Val; Op->Imm.IsFPImm = IsFPImm; Op->Imm.Type = Type; + Op->Imm.Modifiers = 0; Op->StartLoc = Loc; Op->EndLoc = Loc; return Op; @@ -948,6 +966,8 @@ const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]); if (Op.isRegKind() && Op.hasModifiers()) return true; + if (Op.isImm() && Op.hasModifiers()) + return true; if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod || Op.getImmTy() == AMDGPUOperand::ImmTyClamp)) return true; @@ -1692,7 +1712,7 @@ // previous register operands have modifiers for (unsigned i = 2, e = Operands.size(); i != e; ++i) { AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]); - if (Op.isReg()) + if ((Op.isReg() || Op.isImm()) && !Op.hasModifiers()) Op.setModifiers(0); } } @@ -1741,14 +1761,12 @@ unsigned ClampIdx = 0, OModIdx = 0; for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (Op.isRegWithInputMods()) { - Op.addRegWithInputModsOperands(Inst, 2); + if (Op.isRegOrImmWithInputMods()) { + Op.addRegOrImmWithInputModsOperands(Inst, 2); } else if (Op.isClamp()) { ClampIdx = I; } else if (Op.isOMod()) { OModIdx = I; - } else if (Op.isImm()) { - Op.addImmOperands(Inst, 1); } else { assert(false); } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1048,7 +1048,7 @@ } def InputModsMatchClass : AsmOperandClass { - let Name = "RegWithInputMods"; + let Name = "RegOrImmWithInputMods"; } def InputModsNoDefault : Operand { Index: test/MC/AMDGPU/vop3-errs.s =================================================================== --- test/MC/AMDGPU/vop3-errs.s +++ test/MC/AMDGPU/vop3-errs.s @@ -3,3 +3,6 @@ v_add_f32_e64 v0, v1 // CHECK: error: too few operands for instruction + +v_div_scale_f32 v24, vcc, v22, 1.1, v22 +// CHECK: error: invalid operand for instruction Index: test/MC/AMDGPU/vop3.s =================================================================== --- test/MC/AMDGPU/vop3.s +++ test/MC/AMDGPU/vop3.s @@ -289,3 +289,11 @@ v_div_scale_f32 v24, s[10:11], v22, v22, v20 // SICI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xda,0xd2,0x16,0x2d,0x52,0x04] // VI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04] + +v_div_scale_f32 v24, vcc, v22, 1.0, v22 +// SICI: v_div_scale_f32 v24, vcc, v22, 1.0, v22 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0xe5,0x59,0x04] +// VI: v_div_scale_f32 v24, vcc, v22, 1.0, v22 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0xe5,0x59,0x04] + +v_div_scale_f32 v24, vcc, v22, v22, -2.0 +// SICI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0xd6,0x03] +// VI: v_div_scale_f32 v24, vcc, v22, v22, -2.0 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0xd6,0x03]