Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -65,10 +65,6 @@ [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>] >; -def AMDGPUAddeSubeOp : SDTypeProfile<2, 3, - [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>] ->; - //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -203,12 +199,6 @@ // out = (src1 > src0) ? 1 : 0 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; -// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own -// nodes in TargetSelectionDAG.td. -def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>; - -def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>; - def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; @@ -249,7 +239,8 @@ // Special case divide FMA with scale and flags (src0 = Quotient, // src1 = Denominator, src2 = Numerator). -def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>; +def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp, + [SDNPOptInGlue]>; // Single or double precision division fixup. // Special case divide fixup and flags(src0 = Quotient, src1 = Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -236,7 +236,7 @@ } bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { - return isRegClass(RCID) || isInlinableImm(type); + return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); } bool isRegOrImmWithInt16InputMods() const { @@ -461,7 +461,7 @@ } bool isVSrcB32() const { - return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); + return isVCSrcF32() || isLiteralImm(MVT::i32); } bool isVSrcB64() const { @@ -473,12 +473,11 @@ } bool isVSrcV2B16() const { - llvm_unreachable("cannot happen"); - return isVSrcB16(); + return isVSrcB16() || isLiteralImm(MVT::v2i16); } bool isVSrcF32() const { - return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); + return isVCSrcF32() || isLiteralImm(MVT::f32); } bool isVSrcF64() const { @@ -490,8 +489,7 @@ } bool isVSrcV2F16() const { - llvm_unreachable("cannot happen"); - return isVSrcF16(); + return isVSrcF16() || isLiteralImm(MVT::v2f16); } bool isKImmFP32() const { @@ -1145,6 +1143,7 @@ bool validateMIMGD16(const MCInst &Inst); bool validateMIMGDim(const MCInst &Inst); bool validateLdsDirect(const MCInst &Inst); + bool validateVOP3Literal(const MCInst &Inst) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; @@ -1287,6 +1286,8 @@ case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: return &APFloat::IEEEhalf(); default: llvm_unreachable("unsupported fp type"); @@ -1419,8 +1420,14 @@ return false; } + // We allow fp literals with f16x2 operands assuming that the specified + // literal goes into the lower half and the upper half is zero. We also + // require that the literal may be losslesly converted to f16. + MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : + (type == MVT::v2i16)? MVT::i16 : type; + APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); - return canLosslesslyConvertToFPType(FPLiteral, type); + return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); } bool AMDGPUOperand::isRegClass(unsigned RCID) const { @@ -1535,7 +1542,9 @@ case AMDGPU::OPERAND_REG_INLINE_C_INT16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: { bool lost; APFloat FPLiteral(APFloat::IEEEdouble(), Literal); // Convert literal to single precision @@ -1562,6 +1571,8 @@ case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: if (isSafeTruncation(Val, 32) && AMDGPU::isInlinableLiteral32(static_cast(Val), AsmParser->hasInv2PiInlineImm())) { @@ -2419,7 +2430,9 @@ case 2: { const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || - OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { + OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || + OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); } else { return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); @@ -2919,6 +2932,42 @@ return NumLiterals <= 1; } +// VOP3 literal is only allowed in GFX10+ and only one can be used +bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { + unsigned Opcode = Inst.getOpcode(); + const MCInstrDesc &Desc = MII.get(Opcode); + if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) + return true; + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); + const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); + const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); + + const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; + + unsigned NumLiterals = 0; + uint32_t LiteralValue; + + for (int OpIdx : OpIndices) { + if (OpIdx == -1) break; + + const MCOperand &MO = Inst.getOperand(OpIdx); + if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) + continue; + + if (!isInlineConstant(Inst, OpIdx)) { + uint32_t Value = static_cast(MO.getImm()); + if (NumLiterals == 0 || LiteralValue != Value) { + LiteralValue = Value; + ++NumLiterals; + } + } + } + + return !NumLiterals || + (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); +} + bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, const SMLoc &IDLoc) { if (!validateLdsDirect(Inst)) { @@ -2931,6 +2980,11 @@ "only one literal operand is allowed"); return false; } + if (!validateVOP3Literal(Inst)) { + Error(IDLoc, + "invalid literal operand"); + return false; + } if (!validateConstantBusLimitations(Inst)) { Error(IDLoc, "invalid operand (violates constant bus restrictions)"); Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -618,6 +618,14 @@ case AMDGPU::OPERAND_REG_IMM_FP16: printImmediate16(Op.getImm(), STI, O); break; + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + if (!isUInt<16>(Op.getImm()) && + STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { + printImmediate32(Op.getImm(), STI, O); + break; + } + LLVM_FALLTHROUGH; case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: printImmediateV216(Op.getImm(), STI, O); Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -249,6 +249,11 @@ // which does not have f16 support? return getLit16Encoding(static_cast(Imm), STI); + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) + return getLit32Encoding(static_cast(Imm), STI); + LLVM_FALLTHROUGH; case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { uint16_t Lo16 = static_cast(Imm); Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -165,13 +165,16 @@ static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const GCNSubtarget &ST) { MachineInstr *MI = Fold.UseMI; MachineOperand &Old = MI->getOperand(Fold.UseOpNo); assert(Old.isReg()); if (Fold.isImm()) { - if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) { + if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked && + AMDGPU::isInlinableLiteralV216(static_cast(Fold.ImmToFold), + ST.hasInv2PiInlineImm())) { // Set op_sel/op_sel_hi on this operand or bail out if op_sel is // already set. unsigned Opcode = MI->getOpcode(); @@ -192,6 +195,8 @@ // Only apply the following transformation if that operand requries // a packed immediate. switch (TII.get(Opcode).OpInfo[OpNo].OperandType) { + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: // If upper part is all zero we do not need op_sel_hi. @@ -203,6 +208,8 @@ return true; } Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); + Old.ChangeToImmediate(Fold.ImmToFold & 0xffff); + return true; } break; default: @@ -891,7 +898,7 @@ Copy->addImplicitDefUseOperands(*MF); for (FoldCandidate &Fold : FoldList) { - if (updateOperand(Fold, *TII, *TRI)) { + if (updateOperand(Fold, *TII, *TRI, *ST)) { // Clear kill flags. if (Fold.isReg()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2549,19 +2549,12 @@ return false; } + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { - if (isUInt<16>(Imm)) { - int16_t Trunc = static_cast(Imm); - return ST.has16BitInsts() && - AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm()); - } - if (!(Imm & 0xffff)) { - return ST.has16BitInsts() && - AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm()); - } uint32_t Trunc = static_cast(Imm); - return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm()); + return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm()); } default: llvm_unreachable("invalid bitwidth"); @@ -2603,7 +2596,8 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, const MachineOperand &MO) const { - const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo]; + const MCInstrDesc &InstDesc = MI.getDesc(); + const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo]; assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); @@ -2616,7 +2610,15 @@ if (MO.isImm() && isInlineConstant(MO, OpInfo)) return RI.opCanUseInlineConstant(OpInfo.OperandType); - return RI.opCanUseLiteralConstant(OpInfo.OperandType); + if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) + return false; + + if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) + return true; + + const MachineFunction *MF = MI.getParent()->getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + return ST.hasVOP3Literal(); } bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { @@ -3600,17 +3602,14 @@ MachineOperand &Src1 = MI.getOperand(Src1Idx); // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 - // we need to only have one constant bus use. - // - // Note we do not need to worry about literal constants here. They are - // disabled for the operand type for instructions because they will always - // violate the one constant bus use rule. + // we need to only have one constant bus use before GFX10. bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister; if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1) { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); MachineOperand &Src0 = MI.getOperand(Src0Idx); - if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) + if (Src0.isReg() && (RI.isSGPRReg(MRI, Src0.getReg()) || + isLiteralConstantLike(Src0, InstrDesc.OpInfo[Src0Idx]))) legalizeOpWithMove(MI, Src0Idx); } @@ -3702,10 +3701,8 @@ Src1.setSubReg(Src0SubReg); } -// Legalize VOP3 operands. Because all operand types are supported for any -// operand, and since literal constants are not allowed and should never be -// seen, we only need to worry about inserting copies if we use multiple SGPR -// operands. +// Legalize VOP3 operands. All operand types are supported for any operand +// but only one literal constant and only starting from GFX10. void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); @@ -5732,18 +5729,29 @@ SIEncodingFamily Gen = subtargetEncodingFamily(ST); if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && - ST.getGeneration() >= AMDGPUSubtarget::GFX9) + ST.getGeneration() == AMDGPUSubtarget::GFX9) Gen = SIEncodingFamily::GFX9; - if (get(Opcode).TSFlags & SIInstrFlags::SDWA) - Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 - : SIEncodingFamily::SDWA; // Adjust the encoding family to GFX80 for D16 buffer instructions when the // subtarget has UnpackedD16VMem feature. // TODO: remove this when we discard GFX80 encoding. if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf)) Gen = SIEncodingFamily::GFX80; + if (get(Opcode).TSFlags & SIInstrFlags::SDWA) { + switch (ST.getGeneration()) { + default: + Gen = SIEncodingFamily::SDWA; + break; + case AMDGPUSubtarget::GFX9: + Gen = SIEncodingFamily::SDWA9; + break; + case AMDGPUSubtarget::GFX10: + Gen = SIEncodingFamily::SDWA10; + break; + } + } + int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); // -1 means that Opcode is already a native instruction. Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1209,7 +1209,7 @@ !if(!eq(VT.Value, f16.Value), VSrc_f16, !if(!eq(VT.Value, v2f16.Value), - VCSrc_v2f16, + VSrc_v2f16, VSrc_f32 ) ) @@ -1219,7 +1219,7 @@ !if(!eq(VT.Value, i16.Value), VSrc_b16, !if(!eq(VT.Value, v2i16.Value), - VCSrc_v2b16, + VSrc_v2b16, VSrc_b32 ) ) @@ -1255,23 +1255,23 @@ VSrc_128, !if(!eq(VT.Size, 64), !if(isFP, - VCSrc_f64, - VCSrc_b64), + VSrc_f64, + VSrc_b64), !if(!eq(VT.Value, i1.Value), SCSrc_i1, !if(isFP, !if(!eq(VT.Value, f16.Value), - VCSrc_f16, + VSrc_f16, !if(!eq(VT.Value, v2f16.Value), - VCSrc_v2f16, - VCSrc_f32 + VSrc_v2f16, + VSrc_f32 ) ), !if(!eq(VT.Value, i16.Value), - VCSrc_b16, + VSrc_b16, !if(!eq(VT.Value, v2i16.Value), - VCSrc_v2b16, - VCSrc_b32 + VSrc_v2b16, + VSrc_b32 ) ) ) Index: test/CodeGen/AMDGPU/pk_max_f16_literal.ll =================================================================== --- test/CodeGen/AMDGPU/pk_max_f16_literal.ll +++ test/CodeGen/AMDGPU/pk_max_f16_literal.ll @@ -1,7 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1: -; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}} +; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}} define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -14,7 +15,7 @@ } ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0: -; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}} +; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}} define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -27,7 +28,7 @@ } ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1: -; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}} +; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}} define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -40,7 +41,7 @@ } ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1: -; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}} +; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}} define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -53,7 +54,7 @@ } ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0: -; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}} +; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}} define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -66,7 +67,7 @@ } ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1: -; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}} +; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}} define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -79,7 +80,7 @@ } ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0: -; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} +; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}} define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -91,5 +92,50 @@ ret void } +; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8: +; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000 +; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}} +; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}} +define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(<2 x half> addrspace(1)* nocapture %arg) { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp1 = zext i32 %tmp to i64 + %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 + %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 + %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> ) + store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_pk_max_f16_literal_41c8_0: +; GFX9: s_movk_i32 [[C:s[0-9]+]], 0x41c8 +; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}} +; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_pk_max_f16_literal_41c8_0(<2 x half> addrspace(1)* nocapture %arg) { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp1 = zext i32 %tmp to i64 + %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 + %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 + %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> ) + store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_pk_max_f16_literal_42ca_41c8: +; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c842ca +; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}} +; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c842ca, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @test_pk_max_f16_literal_42ca_41c8(<2 x half> addrspace(1)* nocapture %arg) { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp1 = zext i32 %tmp to i64 + %tmp2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i64 %tmp1 + %tmp3 = load <2 x half>, <2 x half> addrspace(1)* %tmp2, align 4 + %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> ) + store <2 x half> %tmp4, <2 x half> addrspace(1)* %tmp2, align 4 + ret void +} + declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) declare i32 @llvm.amdgcn.workitem.id.x() Index: test/MC/AMDGPU/expressions.s =================================================================== --- test/MC/AMDGPU/expressions.s +++ test/MC/AMDGPU/expressions.s @@ -41,9 +41,6 @@ s_mov_b32 s0, foo+2 // VI: s_mov_b32 s0, 514 ; encoding: [0xff,0x00,0x80,0xbe,0x02,0x02,0x00,0x00] -v_mul_f32 v0, foo+2, v2 -// VI: v_mul_f32_e32 v0, 514, v2 ; encoding: [0xff,0x04,0x00,0x0a,0x02,0x02,0x00,0x00] - BB1: v_nop_e64 BB2: @@ -80,23 +77,24 @@ v=1 v_sin_f32 v0, -v -// VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e] +// NOVI: error: invalid operand for instruction +v=1 v_sin_f32 v0, -v[0] // VI: v_sin_f32_e64 v0, -v0 ; encoding: [0x00,0x00,0x69,0xd1,0x00,0x01,0x00,0x20] s=1 -v_sin_f32 v0, -s -// VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e] +s_not_b32 s0, -s +// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe] s0=1 -v_sin_f32 v0, -s0 -// VI: v_sin_f32_e64 v0, -s0 ; encoding: [0x00,0x00,0x69,0xd1,0x00,0x00,0x00,0x20] +s_not_b32 s0, -s0 +// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe] ttmp=1 -v_sin_f32 v0, -ttmp -// VI: v_sin_f32_e32 v0, -1 ; encoding: [0xc1,0x52,0x00,0x7e] +s_not_b32 s0, -ttmp +// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe] ttmp0=1 -v_sin_f32 v0, -[ttmp0] -// VI: v_sin_f32_e64 v0, -ttmp0 ; encoding: [0x00,0x00,0x69,0xd1,0x70,0x00,0x00,0x20] +s_not_b32 s0, -[ttmp0] +// VI: s_not_b32 s0, -1 ; encoding: [0xc1,0x04,0x80,0xbe] Index: test/MC/AMDGPU/gfx10-constant-bus.s =================================================================== --- /dev/null +++ test/MC/AMDGPU/gfx10-constant-bus.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s + +//----------------------------------------------------------------------------------------- +// On GFX10 we can use two scalar operands + +v_add_f32 v0, s0, s1 +// GFX10: v_add_f32_e64 v0, s0, s1 ; encoding: [0x00,0x00,0x03,0xd5,0x00,0x02,0x00,0x00] + +v_madak_f32 v0, s0, v1, 42.42 +// GFX10: v_madak_f32 v0, s0, v1, 0x4229ae14 ; encoding: [0x00,0x02,0x00,0x42,0x14,0xae,0x29,0x42] + +v_med3_f32 v0, s0, s0, s1 +// GFX10: v_med3_f32 v0, s0, s0, s1 ; encoding: [0x00,0x00,0x57,0xd5,0x00,0x00,0x04,0x00] + +//----------------------------------------------------------------------------------------- +// v_div_fmas implicitly reads VCC, so only one scalar operand is possible + +v_div_fmas_f32 v5, s3, s3, s3 +// GFX10: v_div_fmas_f32 v5, s3, s3, s3 ; encoding: [0x05,0x00,0x6f,0xd5,0x03,0x06,0x0c,0x00] + +v_div_fmas_f32 v5, s3, s3, s2 +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) + +v_div_fmas_f32 v5, s3, 0x123, v3 +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) + +v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678 +// GFX10: v_div_fmas_f64 v[5:6], 0x12345678, 0x12345678, 0x12345678 ; encoding: [0x05,0x00,0x70,0xd5,0xff,0xfe,0xfd,0x03,0x78,0x56,0x34,0x12] + +v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4] +// GFX10: v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4] ; encoding: [0x05,0x00,0x70,0xd5,0x01,0x05,0x0c,0x04] + +v_div_fmas_f64 v[5:6], v[1:2], s[2:3], 0x123456 +// GFX10-ERR: error: invalid operand (violates constant bus restrictions) Index: test/MC/AMDGPU/literals.s =================================================================== --- test/MC/AMDGPU/literals.s +++ test/MC/AMDGPU/literals.s @@ -282,12 +282,12 @@ // GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00] v_fract_f64_e32 v[0:1], 1234 -// NOSICI: error: invalid operand for instruction -// NOGFX89: error: invalid operand for instruction +// NOSICI: error: invalid literal operand +// NOGFX89: error: invalid literal operand v_trunc_f32_e64 v0, 1234 -// NOSICI: error: invalid operand for instruction -// NOGFX89: error: invalid operand for instruction +// NOSICI: error: invalid literal operand +// NOGFX89: error: invalid literal operand v_fract_f64_e64 v[0:1], 1234 // SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff] @@ -378,8 +378,8 @@ // GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00] v_and_b32_e32 v0, 1234, v1 -// NOSICI: error: invalid operand for instruction -// NOGFX89: error: invalid operand for instruction +// NOSICI: error: invalid literal operand +// NOGFX89: error: invalid literal operand v_and_b32_e64 v0, 1234, v1 // SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff] @@ -450,12 +450,12 @@ // GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00] v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: invalid literal operand // GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00] v_trunc_f32_e64 v0, 0x3e22f983 -// NOSICI: error: invalid operand for instruction -// NOGFX89: error: invalid operand for instruction +// NOSICI: error: invalid literal operand +// NOGFX89: error: invalid literal operand v_fract_f64_e64 v[0:1], 0x3e22f983 // NOSICI: error: invalid operand for instruction @@ -466,7 +466,7 @@ // GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26] v_and_b32_e32 v0, 0.159154943091895317852646485335, v1 -// NOSICI: error: invalid operand for instruction +// NOSICI: error: invalid literal operand // GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00] v_and_b32_e64 v0, 0.159154943091895317852646485335, v1 Index: test/MC/AMDGPU/literalv216-err.s =================================================================== --- test/MC/AMDGPU/literalv216-err.s +++ test/MC/AMDGPU/literalv216-err.s @@ -1,28 +1,28 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s v_pk_add_f16 v1, -17, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, 65, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, 64.0, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, -0.15915494, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, -0.0, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, -32768, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, 32767, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, 0xffffffffffff000f, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand v_pk_add_f16 v1, 0x1000ffff, v2 -// GFX9: error: invalid operand for instruction +// GFX9: error: invalid literal operand Index: test/MC/AMDGPU/literalv216.s =================================================================== --- test/MC/AMDGPU/literalv216.s +++ test/MC/AMDGPU/literalv216.s @@ -1,112 +1,286 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GFX9 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck %s --check-prefix=GFX10 + +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX9 +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOGFX10 + +//===----------------------------------------------------------------------===// +// Inline constants +//===----------------------------------------------------------------------===// v_pk_add_f16 v1, 0, v2 // GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18] v_pk_add_f16 v1, 0.0, v2 // GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18] v_pk_add_f16 v1, v2, 0 // GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18] +// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18] v_pk_add_f16 v1, v2, 0.0 // GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18] +// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18] v_pk_add_f16 v1, 1.0, v2 // GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18] v_pk_add_f16 v1, -1.0, v2 // GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18] v_pk_add_f16 v1, -0.5, v2 // GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18] v_pk_add_f16 v1, 0.5, v2 // GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18] v_pk_add_f16 v1, 2.0, v2 // GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18] v_pk_add_f16 v1, -2.0, v2 // GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18] v_pk_add_f16 v1, 4.0, v2 // GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18] v_pk_add_f16 v1, -4.0, v2 // GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18] v_pk_add_f16 v1, 0.15915494, v2 // GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18] v_pk_add_f16 v1, -1, v2 // GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] v_pk_add_f16 v1, -2, v2 // GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc2,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18] v_pk_add_f16 v1, -3, v2 // GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc3,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18] v_pk_add_f16 v1, -16, v2 // GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xd0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18] v_pk_add_f16 v1, 1, v2 // GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18] v_pk_add_f16 v1, 2, v2 // GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x82,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18] v_pk_add_f16 v1, 3, v2 // GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x83,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18] v_pk_add_f16 v1, 4, v2 // GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x84,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18] v_pk_add_f16 v1, 15, v2 // GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x8f,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18] v_pk_add_f16 v1, 16, v2 // GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x90,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18] v_pk_add_f16 v1, 63, v2 // GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xbf,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18] v_pk_add_f16 v1, 64, v2 // GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18] v_pk_add_f16 v1, 0x0001, v2 // GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18] v_pk_add_f16 v1, 0xffff, v2 // GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] v_pk_add_f16 v1, 0x3c00, v2 // GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18] v_pk_add_f16 v1, 0xbc00, v2 // GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18] v_pk_add_f16 v1, 0x3800, v2 // GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18] v_pk_add_f16 v1, 0xb800, v2 // GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18] v_pk_add_f16 v1, 0x4000, v2 // GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18] v_pk_add_f16 v1, 0xc000, v2 // GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18] v_pk_add_f16 v1, 0x4400, v2 // GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18] v_pk_add_f16 v1, 0xc400, v2 // GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18] v_pk_add_f16 v1, 0x3118, v2 // GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18] v_pk_add_f16 v1, 65535, v2 // GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] + +//===----------------------------------------------------------------------===// +// Integer literals +//===----------------------------------------------------------------------===// + +v_pk_add_f16 v5, v1, 0x12345678 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] + +v_pk_add_f16 v5, 0x12345678, v2 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] + +v_pk_add_f16 v5, -256, v2 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] + +v_pk_add_f16 v5, v1, 256 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] + +v_pk_add_u16 v5, v1, 0x12345678 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] + +v_pk_add_u16 v5, 0x12345678, v2 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] + +v_pk_add_u16 v5, -256, v2 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] + +v_pk_add_u16 v5, v1, 256 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] + +v_pk_add_f16 v5, v1, 0x123456780 +// NOGFX9: error: invalid operand for instruction +// NOGFX10: error: invalid operand for instruction + +v_pk_add_u16 v5, v1, 0x123456780 +// NOGFX9: error: invalid operand for instruction +// NOGFX10: error: invalid operand for instruction + +v_pk_fma_f16 v5, 0xaf123456, v2, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_fma_f16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf] + +v_pk_fma_f16 v5, v1, 0xaf123456, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_fma_f16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf] + +v_pk_fma_f16 v5, v1, v2, 0xaf123456 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_fma_f16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf] + +v_pk_mad_i16 v5, 0xaf123456, v2, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_mad_i16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf] + +v_pk_mad_i16 v5, v1, 0xaf123456, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_mad_i16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf] + +v_pk_mad_i16 v5, v1, v2, 0xaf123456 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_mad_i16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf] + +v_pk_ashrrev_i16 v5, 0x12345678, v2 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] + +v_pk_ashrrev_i16 v5, v1, 0x12345678 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] + +//===----------------------------------------------------------------------===// +// Floating-point literals (allowed if lossless conversion to f16 is possible) +//===----------------------------------------------------------------------===// + +v_pk_add_f16 v5, v1, 0.1234 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_f16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00] + +v_pk_add_u16 v5, v1, 0.1234 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_add_u16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00] + +v_pk_fma_f16 v5, 0.1234, v2, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_fma_f16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00] + +v_pk_fma_f16 v5, v1, 0.1234, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_fma_f16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00] + +v_pk_fma_f16 v5, v1, v2, 0.1234 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_fma_f16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00] + +v_pk_mad_i16 v5, 0.1234, v2, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_mad_i16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00] + +v_pk_mad_i16 v5, v1, 0.1234, v3 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_mad_i16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00] + +v_pk_mad_i16 v5, v1, v2, 0.1234 +// NOGFX9: error: invalid literal operand +// GFX10: v_pk_mad_i16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00] + +v_pk_add_f16 v5, v1, 123456.0 +// NOGFX9: error: invalid operand for instruction +// NOGFX10: error: invalid operand for instruction + +v_pk_add_u16 v5, v1, 123456.0 +// NOGFX9: error: invalid operand for instruction +// NOGFX10: error: invalid operand for instruction + +//===----------------------------------------------------------------------===// +// Packed VOP2 +//===----------------------------------------------------------------------===// + +// FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid +v_pk_fmac_f16 v5, 0x12345678, v2 +// NOGFX9: error: instruction not supported on this GPU +// GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] Index: test/MC/AMDGPU/reloc.s =================================================================== --- test/MC/AMDGPU/reloc.s +++ test/MC/AMDGPU/reloc.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu=kaveri -show-encoding %s | llvm-readobj -r | FileCheck %s +// RUN: llvm-mc -filetype=obj -triple amdgcn-- -mcpu=kaveri -show-encoding %s | llvm-readobj -relocations | FileCheck %s // CHECK: Relocations [ // CHECK: .rel.text { @@ -9,13 +9,6 @@ // CHECK: R_AMDGPU_GOTPCREL32_HI global_var2 0x0 // CHECK: R_AMDGPU_REL32_LO global_var3 0x0 // CHECK: R_AMDGPU_REL32_HI global_var4 0x0 -// CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 0x0 -// CHECK: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 0x0 -// CHECK: R_AMDGPU_GOTPCREL global_var0 0x0 -// CHECK: R_AMDGPU_GOTPCREL32_LO global_var1 0x0 -// CHECK: R_AMDGPU_GOTPCREL32_HI global_var2 0x0 -// CHECK: R_AMDGPU_REL32_LO global_var3 0x0 -// CHECK: R_AMDGPU_REL32_HI global_var4 0x0 // CHECK: R_AMDGPU_ABS32 var 0x0 // CHECK: } // CHECK: .rel.data { @@ -33,14 +26,6 @@ s_mov_b32 s5, global_var3@rel32@lo s_mov_b32 s6, global_var4@rel32@hi - v_mov_b32 v0, SCRATCH_RSRC_DWORD0 - v_mov_b32 v1, SCRATCH_RSRC_DWORD1 - v_mov_b32 v2, global_var0@GOTPCREL - v_mov_b32 v3, global_var1@gotpcrel32@lo - v_mov_b32 v4, global_var2@gotpcrel32@hi - v_mov_b32 v5, global_var3@rel32@lo - v_mov_b32 v6, global_var4@rel32@hi - .globl global_var0 .globl global_var1 .globl global_var2 Index: test/MC/AMDGPU/vop2-err.s =================================================================== --- test/MC/AMDGPU/vop2-err.s +++ test/MC/AMDGPU/vop2-err.s @@ -6,7 +6,7 @@ //===----------------------------------------------------------------------===// v_mul_i32_i24 v1, v2, 100 -// CHECK: error: invalid operand for instruction +// CHECK: error: invalid literal operand //===----------------------------------------------------------------------===// // _e32 checks @@ -29,11 +29,11 @@ // Immediate src0 v_mul_i32_i24_e64 v1, 100, v3 -// CHECK: error: invalid operand for instruction +// CHECK: error: invalid literal operand // Immediate src1 v_mul_i32_i24_e64 v1, v2, 100 -// CHECK: error: invalid operand for instruction +// CHECK: error: invalid literal operand v_add_i32_e32 v1, s[0:1], v2, v3 // CHECK: error: invalid operand for instruction Index: test/MC/AMDGPU/vop3-errs.s =================================================================== --- test/MC/AMDGPU/vop3-errs.s +++ test/MC/AMDGPU/vop3-errs.s @@ -7,7 +7,7 @@ // GCN: error: too few operands for instruction v_div_scale_f32 v24, vcc, v22, 1.1, v22 -// GCN: error: invalid operand for instruction +// GCN: error: invalid literal operand v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3] // GFX67: error: instruction not supported on this GPU Index: test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt @@ -0,0 +1,149 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding %s | FileCheck -check-prefix=GFX10 %s + +#===----------------------------------------------------------------------===// +# Inline constants +#===----------------------------------------------------------------------===// + +# GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18] +0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18 + +# GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18 + +# GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18] +0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18 + +# GFX10: v_pk_fma_f16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x0e,0xcc,0xf2,0xe8,0xd9,0x1b] +0x05,0x40,0x0e,0xcc,0xf2,0xe8,0xd9,0x1b + +# GFX10: v_pk_fma_f16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b] +0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b + +# GFX10: v_pk_mad_i16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b] +0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b + +# GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b] +0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b + +# GFX10: v_pk_ashrrev_i16 v5, 1, 16 ; encoding: [0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18] +0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18 + +#===----------------------------------------------------------------------===// +# 32-bit literals +#===----------------------------------------------------------------------===// + +# GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12 + +# GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12 + +# GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] +0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff + +# GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] +0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00 + +# GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12 + +# GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12 + +# GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] +0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff + +# GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] +0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00 + +# GFX10: v_pk_fma_f16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf] +0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf + +# GFX10: v_pk_fma_f16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf] +0x05,0x40,0x0e,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf + +# GFX10: v_pk_fma_f16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf] +0x05,0x40,0x0e,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf + +# GFX10: v_pk_mad_i16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf] +0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf + +# GFX10: v_pk_mad_i16 v5, v1, 0xaf123456, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf] +0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x56,0x34,0x12,0xaf + +# GFX10: v_pk_mad_i16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf] +0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf + +# GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12 + +# GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12 + +#===----------------------------------------------------------------------===// +# Packed VOP2 +#===----------------------------------------------------------------------===// + +# FIXME: v_pk_fmac_f16 cannot be promoted to VOP3 so '_e32' suffix is not valid +# GFX10: v_pk_fmac_f16_e32 v5, 0x12345678, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12] +0xff,0x04,0x0a,0x78,0x78,0x56,0x34,0x12 Index: test/MC/Disassembler/AMDGPU/vop3-literal.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AMDGPU/vop3-literal.txt @@ -0,0 +1,49 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX10 %s + +# GFX10: v_bfe_u32 v0, 0x3039, v1, s1 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00] +0x00,0x00,0x48,0xd5,0xff,0x02,0x06,0x00,0x39,0x30,0x00,0x00 + +# GFX10: v_bfe_u32 v0, v1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xff,0x05,0x00,0x39,0x30,0x00,0x00] +0x00,0x00,0x48,0xd5,0x01,0xff,0x05,0x00,0x39,0x30,0x00,0x00 + +# GFX10: v_bfe_u32 v0, v1, s1, 0x3039 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0x03,0xfc,0x03,0x39,0x30,0x00,0x00] +0x00,0x00,0x48,0xd5,0x01,0x03,0xfc,0x03,0x39,0x30,0x00,0x00 + +# GFX10: v_bfe_u32 v0, 0x3039, v1, v2 ; encoding: [0x00,0x00,0x48,0xd5,0xff,0x02,0x0a,0x04,0x39,0x30,0x00,0x00] +0x00,0x00,0x48,0xd5,0xff,0x02,0x0a,0x04,0x39,0x30,0x00,0x00 + +# GFX10: v_bfe_u32 v0, s1, 0x3039, s1 ; encoding: [0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00] +0x00,0x00,0x48,0xd5,0x01,0xfe,0x05,0x00,0x39,0x30,0x00,0x00 + +# GFX10: v_bfm_b32_e64 v0, 0x3039, s1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00] +0x00,0x00,0x63,0xd7,0xff,0x02,0x00,0x00,0x39,0x30,0x00,0x00 + +# GFX10: v_bfm_b32_e64 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00] +0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00 + +# GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00] +0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00 + +# GFX10: v_pk_add_f16 v1, 0x1e240, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00] +0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00 + +# GFX10: v_pk_add_f16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] +0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff + +# GFX10: v_pk_add_u16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] +0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff + +# GFX10: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18] +0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18 + +# GFX10: v_pk_add_u16 v1, 0x41, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00] +0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00 + +# GFX10: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18] +0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18 + +# GFX10: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18] +0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18 + +# GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff] +0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff