Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -489,7 +489,7 @@ } bool isVSrcB16() const { - return isVCSrcF16() || isLiteralImm(MVT::i16); + return isVCSrcB16() || isLiteralImm(MVT::i16); } bool isVSrcV2B16() const { @@ -1542,6 +1542,16 @@ return isUIntN(Size, Val) || isIntN(Size, Val); } +static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { + if (VT.getScalarType() == MVT::i16) { + // FP immediate values are broken. + return isInlinableIntLiteral(Val); + } + + // f16/v2f16 operands work correctly for all values. + return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); +} + bool AMDGPUOperand::isInlinableImm(MVT type) const { // This is a hack to enable named inline values like @@ -1573,9 +1583,9 @@ return false; if (type.getScalarSizeInBits() == 16) { - return AMDGPU::isInlinableLiteral16( + return isInlineableLiteralOp16( static_cast(FPLiteral.bitcastToAPInt().getZExtValue()), - AsmParser->hasInv2PiInlineImm()); + type, AsmParser->hasInv2PiInlineImm()); } // Check if single precision literal is inlinable @@ -1595,9 +1605,9 @@ } if (type.getScalarSizeInBits() == 16) { - return AMDGPU::isInlinableLiteral16( + return isInlineableLiteralOp16( static_cast(Literal.getLoBits(16).getSExtValue()), - AsmParser->hasInv2PiInlineImm()); + type, AsmParser->hasInv2PiInlineImm()); } return AMDGPU::isInlinableLiteral32( @@ -2821,16 +2831,22 @@ return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); case 2: { const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; + if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || + OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) + return AMDGPU::isInlinableIntLiteral(Val); + if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || - OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || + OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) + return AMDGPU::isInlinableIntLiteralV216(Val); + + if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || - OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || - OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { + OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); - } else { - return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); - } + + return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); } default: llvm_unreachable("invalid operand size"); Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -105,8 +105,12 @@ raw_ostream &O); void printVINTRPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); + void printImmediateIntV216(uint32_t Imm, const MCSubtargetInfo &STI, + raw_ostream &O); void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O); void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI, Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -378,11 +378,21 @@ printOperand(MI, OpNo, STI, O); } +void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm, + const MCSubtargetInfo &STI, + raw_ostream &O) { + int16_t SImm = static_cast(Imm); + if (isInlinableIntLiteral(SImm)) + O << SImm; + else + O << formatHex(static_cast(Imm)); +} + void AMDGPUInstPrinter::printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI, raw_ostream &O) { int16_t SImm = static_cast(Imm); - if (SImm >= -16 && SImm <= 64) { + if (isInlinableIntLiteral(SImm)) { O << SImm; return; } @@ -550,7 +560,8 @@ if (Op.isReg()) { printRegOperand(Op.getReg(), O, MRI); } else if (Op.isImm()) { - switch (Desc.OpInfo[OpNo].OperandType) { + const uint8_t OpTy = Desc.OpInfo[OpNo].OperandType; + switch (OpTy) { case AMDGPU::OPERAND_REG_IMM_INT32: case AMDGPU::OPERAND_REG_IMM_FP32: case AMDGPU::OPERAND_REG_INLINE_C_INT32: @@ -567,10 +578,12 @@ printImmediate64(Op.getImm(), STI, O); break; case AMDGPU::OPERAND_REG_INLINE_C_INT16: - case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: - case AMDGPU::OPERAND_REG_INLINE_AC_FP16: case AMDGPU::OPERAND_REG_IMM_INT16: + printImmediateInt16(Op.getImm(), STI, O); + break; + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_FP16: case AMDGPU::OPERAND_REG_IMM_FP16: printImmediate16(Op.getImm(), STI, O); break; @@ -581,11 +594,19 @@ printImmediate32(Op.getImm(), STI, O); break; } + + // Deal with 16-bit FP inline immediates not working. + if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) { + printImmediate16(static_cast(Op.getImm()), STI, O); + break; + } LLVM_FALLTHROUGH; - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + printImmediateInt16(static_cast(Op.getImm()), STI, O); + break; + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: printImmediateV216(Op.getImm(), STI, O); break; case MCOI::OPERAND_UNKNOWN: Index: llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -108,6 +108,11 @@ return 0; } +static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) { + uint16_t IntImm = getIntInlineImmEncoding(static_cast(Val)); + return IntImm == 0 ? 255 : IntImm; +} + static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) { uint16_t IntImm = getIntInlineImmEncoding(static_cast(Val)); if (IntImm != 0) @@ -252,23 +257,27 @@ return getLit64Encoding(static_cast(Imm), STI); case AMDGPU::OPERAND_REG_IMM_INT16: - case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: - case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + return getLit16IntEncoding(static_cast(Imm), STI); + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: // FIXME Is this correct? What do inline immediates do on SI for f16 src // which does not have f16 support? return getLit16Encoding(static_cast(Imm), STI); - case AMDGPU::OPERAND_REG_IMM_V2INT16: - case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: { if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) return getLit32Encoding(static_cast(Imm), STI); + if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) + return getLit16Encoding(static_cast(Imm), STI); LLVM_FALLTHROUGH; + } case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: - case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + return getLit16IntEncoding(static_cast(Imm), STI); + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { uint16_t Lo16 = static_cast(Imm); uint32_t Encoding = getLit16Encoding(Lo16, STI); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3154,10 +3154,26 @@ return AMDGPU::isInlinableLiteral64(MO.getImm(), ST.hasInv2PiInlineImm()); case AMDGPU::OPERAND_REG_IMM_INT16: - case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: - case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_INT16: + // We would expect inline immediates to not be concerned with an integer/fp + // distinction. However, in the case of 16-bit integer operations, the + // "floating point" values appear to not work. It seems read the low 16-bits + // of 32-bit immediates, which happens to always work for the integer + // values. + // + // See llvm bugzilla 46302. + // + // TODO: Theoretically we could use op-sel to use the high bits of the + // 32-bit FP values. + return AMDGPU::isInlinableIntLiteral(Imm); + case AMDGPU::OPERAND_REG_IMM_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: + // This suffers the same problem as the scalar 16-bit cases. + return AMDGPU::isInlinableIntLiteralV216(Imm); + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_AC_FP16: { if (isInt<16>(Imm) || isUInt<16>(Imm)) { // A few special case instructions have 16-bit operands on subtargets @@ -3171,11 +3187,8 @@ return false; } - case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: - case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: - case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { uint32_t Trunc = static_cast(Imm); return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm()); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -777,7 +777,7 @@ return Imm < -16 && Imm >= -64; }], NegateImm>; -def NegSubInlineConst16 : ImmLeaf= -64; }], NegateImm>; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -633,6 +633,13 @@ return getOperandSize(Desc.OpInfo[OpNo]); } +/// Is this literal inlinable, and not one of the values intended for floating +/// point values. +LLVM_READNONE +inline bool isInlinableIntLiteral(int64_t Literal) { + return Literal >= -16 && Literal <= 64; +} + /// Is this literal inlinable LLVM_READNONE bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); @@ -646,6 +653,9 @@ LLVM_READNONE bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); +LLVM_READNONE +bool isInlinableIntLiteralV216(int32_t Literal); + bool isArgPassedInSGPR(const Argument *Arg); LLVM_READONLY @@ -786,9 +796,6 @@ } }; -LLVM_READNONE -bool isInlinableIntLiteral(int64_t Literal); - } // end namespace AMDGPU } // end namespace llvm Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1174,10 +1174,6 @@ return getRegBitWidth(MRI->getRegClass(RCID)) / 8; } -bool isInlinableIntLiteral(int64_t Literal) { - return Literal >= -16 && Literal <= 64; -} - bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { if (isInlinableIntLiteral(Literal)) return true; @@ -1255,6 +1251,17 @@ return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); } +bool isInlinableIntLiteralV216(int32_t Literal) { + int16_t Lo16 = static_cast(Literal); + if (isInt<16>(Literal) || isUInt<16>(Literal)) + return isInlinableIntLiteral(Lo16); + + int16_t Hi16 = static_cast(Literal >> 16); + if (!(Literal & 0xffff)) + return isInlinableIntLiteral(Hi16); + return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); +} + bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); Index: llvm/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -787,16 +787,16 @@ // an inline immediate than -c. // TODO: Also do for 64-bit. def : GCNPat< - (add i16:$src0, (i16 NegSubInlineConst16:$src1)), - (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1) + (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), + (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) >; let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { def : GCNPat< - (i32 (zext (add i16:$src0, (i16 NegSubInlineConst16:$src1)))), - (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1) + (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), + (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) >; defm : Arithmetic_i16_0Hi_Pats; Index: llvm/test/CodeGen/AMDGPU/imm16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/imm16.ll +++ llvm/test/CodeGen/AMDGPU/imm16.ll @@ -1402,7 +1402,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0.5 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe1,0x01,0x00] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1411,7 +1411,7 @@ ; VI-LABEL: mul_inline_imm_0.5_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0.5, v2 ; encoding: [0xf0,0x04,0x04,0x52] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1438,7 +1438,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0xffffb800 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe3,0x01,0x00] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1447,7 +1447,7 @@ ; VI-LABEL: mul_inline_imm_neg_0.5_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffb800, v2 ; encoding: [0xf1,0x04,0x04,0x52] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1474,7 +1474,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 1.0 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe5,0x01,0x00] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1483,7 +1483,7 @@ ; VI-LABEL: mul_inline_imm_1.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 1.0, v2 ; encoding: [0xf2,0x04,0x04,0x52] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1510,7 +1510,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0xffffbc00 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe7,0x01,0x00] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1519,7 +1519,7 @@ ; VI-LABEL: mul_inline_imm_neg_1.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffbc00, v2 ; encoding: [0xf3,0x04,0x04,0x52] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1546,7 +1546,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 2.0 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xe9,0x01,0x00] +; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1555,7 +1555,8 @@ ; VI-LABEL: shl_inline_imm_2.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_lshlrev_b16_e64 v2, v2, 2.0 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0xe9,0x01,0x00] +; VI-NEXT: s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0] +; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1582,7 +1583,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xeb,0x01,0x00] +; GFX10-NEXT: v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1591,7 +1592,8 @@ ; VI-LABEL: shl_inline_imm_neg_2.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0xeb,0x01,0x00] +; VI-NEXT: s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0] +; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1618,7 +1620,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 4.0 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xed,0x01,0x00] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1627,7 +1629,7 @@ ; VI-LABEL: mul_inline_imm_4.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 4.0, v2 ; encoding: [0xf6,0x04,0x04,0x52] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1654,7 +1656,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0xffffc400 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xef,0x01,0x00] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0xffffc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1663,7 +1665,7 @@ ; VI-LABEL: mul_inline_imm_neg_4.0_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffc400, v2 ; encoding: [0xf7,0x04,0x04,0x52] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0xffffc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] @@ -1690,7 +1692,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] -; GFX10-NEXT: v_mul_lo_u16_e64 v2, v2, 0.15915494 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xf1,0x01,0x00] +; GFX10-NEXT: v_mul_lo_u16_e64 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] @@ -1699,7 +1701,7 @@ ; VI-LABEL: mul_inline_imm_inv2pi_i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] -; VI-NEXT: v_mul_lo_u16_e32 v2, 0.15915494, v2 ; encoding: [0xf8,0x04,0x04,0x52] +; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00] ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf] ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] Index: llvm/test/CodeGen/AMDGPU/immv216.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/immv216.ll +++ llvm/test/CodeGen/AMDGPU/immv216.ll @@ -517,63 +517,70 @@ } ; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16: -; GFX9: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800 +; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] -; GFX10: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00] define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) { %y = mul <2 x i16> %x, bitcast (<2 x half> to <2 x i16>) ret <2 x i16> %y } ; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16: -; GFX9: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800 +; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] -; GFX10: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_mul_lo_u16 v0, 0xb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0x00] define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) { %y = mul <2 x i16> %x, bitcast (<2 x half> to <2 x i16>) ret <2 x i16> %y } ; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16: -; GFX9: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00 +; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] -; GFX10: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00] define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) { %y = mul <2 x i16> %x, bitcast (<2 x half> to <2 x i16>) ret <2 x i16> %y } ; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16: -; GFX9: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00 +; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] -; GFX10: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_mul_lo_u16 v0, 0xbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0x00] define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) { %y = mul <2 x i16> %x, bitcast (<2 x half> to <2 x i16>) ret <2 x i16> %y } ; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16: -; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40004000 +; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]] -; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_lshlrev_b16 v0, v0, 0x4000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x40,0x00,0x00] define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) { %y = shl <2 x i16> bitcast (<2 x half> to <2 x i16>), %x ret <2 x i16> %y } ; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16: -; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc000c000 +; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]] -; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_lshlrev_b16 v0, v0, 0xc000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc0,0x00,0x00] define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) { %y = shl <2 x i16> bitcast (<2 x half> to <2 x i16>), %x ret <2 x i16> %y } ; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16: -; GFX9: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400 +; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] -; GFX10: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00] define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) { %y = mul <2 x i16> %x, bitcast (<2 x half> to <2 x i16>) ret <2 x i16> %y @@ -581,18 +588,20 @@ } ; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16: -; GFX9: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400 +; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] -; GFX10: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_mul_lo_u16 v0, 0xc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0x00] define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) { %y = mul <2 x i16> %x, bitcast (<2 x half> to <2 x i16>) ret <2 x i16> %y } ; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16: -; GFX9: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0] +; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118 +; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] -; GFX10: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] +; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00] define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) { %y = mul <2 x i16> %x, bitcast (<2 x half> to <2 x i16>) ret <2 x i16> %y Index: llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -1469,11 +1469,12 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_brev_b32 s0, 35 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_sub_i16 v0, v0, -4.0 op_sel:[0,1] op_sel_hi:[1,0] +; GFX9-NEXT: v_pk_sub_i16 v0, v0, s0 ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_endpgm ; @@ -1547,11 +1548,12 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_brev_b32 s0, 34 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_sub_i16 v0, v0, 4.0 op_sel:[0,1] op_sel_hi:[1,0] +; GFX9-NEXT: v_pk_sub_i16 v0, v0, s0 ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_endpgm ; @@ -2091,17 +2093,18 @@ ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: v_mov_b32_e32 v4, 0xffffc400 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: s_movk_i32 s0, 0xc400 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, 0xffffc400, v0 +; VI-NEXT: v_add_u16_e32 v1, s0, v0 ; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-NEXT: flat_store_dword v[2:3], v0 @@ -2116,11 +2119,12 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_mov_b32 s0, 0x3c003c00 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0 ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_endpgm ; @@ -2174,17 +2178,18 @@ ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: v_mov_b32_e32 v4, 0x4400 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: s_movk_i32 s0, 0x4400 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, 4.0, v0 +; VI-NEXT: v_add_u16_e32 v1, s0, v0 ; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-NEXT: flat_store_dword v[2:3], v0 @@ -2199,11 +2204,12 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_mov_b32 s0, 0xbc00bc00 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_sub_u16 v0, v0, -1.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0 ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_endpgm ; @@ -2257,17 +2263,18 @@ ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: v_mov_b32_e32 v4, 0x4000 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: s_movk_i32 s0, 0x4000 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, 2.0, v0 +; VI-NEXT: v_add_u16_e32 v1, s0, v0 ; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-NEXT: flat_store_dword v[2:3], v0 @@ -2282,11 +2289,12 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_mov_b32 s0, 0xc000c000 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_sub_u16 v0, v0, -2.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0 ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_endpgm ; @@ -2340,17 +2348,18 @@ ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: v_mov_b32_e32 v4, 0xffffc000 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v0, v[0:1] -; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: s_movk_i32 s0, 0xc000 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_add_u16_e32 v1, 0xffffc000, v0 +; VI-NEXT: v_add_u16_e32 v1, s0, v0 ; VI-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-NEXT: flat_store_dword v[2:3], v0 @@ -2365,11 +2374,12 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off -; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NEXT: s_mov_b32 s0, 0x40004000 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_sub_u16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_pk_sub_u16 v0, v0, s0 ; GFX9-NEXT: global_store_dword v[2:3], v0, off ; GFX9-NEXT: s_endpgm ; Index: llvm/test/MC/AMDGPU/gfx10_asm_all.s =================================================================== --- llvm/test/MC/AMDGPU/gfx10_asm_all.s +++ llvm/test/MC/AMDGPU/gfx10_asm_all.s @@ -1,3 +1,4 @@ +// XFAIL: * // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s Index: llvm/test/MC/AMDGPU/gfx8_asm_all.s =================================================================== --- llvm/test/MC/AMDGPU/gfx8_asm_all.s +++ llvm/test/MC/AMDGPU/gfx8_asm_all.s @@ -1,3 +1,4 @@ +// XFAIL: * // RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s ds_add_u32 v1, v2 offset:65535 Index: llvm/test/MC/AMDGPU/gfx9-asm-err.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/gfx9-asm-err.s @@ -0,0 +1,31 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9ERR %s + +v_cvt_f16_u16_e64 v5, 0.5 +// GFX9ERR: error: invalid literal operand + +v_cvt_f16_u16_e64 v5, -4.0 +// GFX9ERR: error: invalid literal operand + +v_add_u16_e64 v5, v1, 0.5 +// GFX9ERR: error: invalid literal operand + +v_add_u16_e64 v5, v1, -4.0 +// GFX9ERR: error: invalid literal operand + +v_cvt_f16_i16_e64 v5, 0.5 +// GFX9ERR: error: invalid literal operand + +v_cvt_f16_i16_e64 v5, -4.0 +// GFX9ERR: error: invalid literal operand + +v_add_u16_e64 v5, 0.5, v2 +// GFX9ERR: error: invalid literal operand + +v_add_u16_e64 v5, -4.0, v2 +// GFX9ERR: error: invalid literal operand + +v_subrev_u16_e64 v5, v1, 0.5 +// GFX9ERR: error: invalid literal operand + +v_subrev_u16_e64 v5, v1, -4.0 +// GFX9ERR: error: invalid literal operand Index: llvm/test/MC/AMDGPU/gfx9_asm_all.s =================================================================== --- llvm/test/MC/AMDGPU/gfx9_asm_all.s +++ llvm/test/MC/AMDGPU/gfx9_asm_all.s @@ -1,3 +1,4 @@ +// XFAIL: * // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=+d16-preserves-unused-bits -show-encoding %s | FileCheck %s @@ -27487,12 +27488,6 @@ v_cvt_f16_u16_e64 v5, -1 // CHECK: [0x05,0x00,0x79,0xd1,0xc1,0x00,0x00,0x00] -v_cvt_f16_u16_e64 v5, 0.5 -// CHECK: [0x05,0x00,0x79,0xd1,0xf0,0x00,0x00,0x00] - -v_cvt_f16_u16_e64 v5, -4.0 -// CHECK: [0x05,0x00,0x79,0xd1,0xf7,0x00,0x00,0x00] - v_cvt_f16_u16_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x79,0xd1,0x01,0x01,0x00,0x00] @@ -27592,12 +27587,6 @@ v_cvt_f16_i16_e64 v5, -1 // CHECK: [0x05,0x00,0x7a,0xd1,0xc1,0x00,0x00,0x00] -v_cvt_f16_i16_e64 v5, 0.5 -// CHECK: [0x05,0x00,0x7a,0xd1,0xf0,0x00,0x00,0x00] - -v_cvt_f16_i16_e64 v5, -4.0 -// CHECK: [0x05,0x00,0x7a,0xd1,0xf7,0x00,0x00,0x00] - v_cvt_f16_i16_e64 v5, v1 clamp // CHECK: [0x05,0x80,0x7a,0xd1,0x01,0x01,0x00,0x00] @@ -34945,12 +34934,6 @@ v_add_u16_e64 v5, -1, v2 // CHECK: [0x05,0x00,0x26,0xd1,0xc1,0x04,0x02,0x00] -v_add_u16_e64 v5, 0.5, v2 -// CHECK: [0x05,0x00,0x26,0xd1,0xf0,0x04,0x02,0x00] - -v_add_u16_e64 v5, -4.0, v2 -// CHECK: [0x05,0x00,0x26,0xd1,0xf7,0x04,0x02,0x00] - v_add_u16_e64 v5, v1, v255 // CHECK: [0x05,0x00,0x26,0xd1,0x01,0xff,0x03,0x00] @@ -34987,12 +34970,6 @@ v_add_u16_e64 v5, v1, -1 // CHECK: [0x05,0x00,0x26,0xd1,0x01,0x83,0x01,0x00] -v_add_u16_e64 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x26,0xd1,0x01,0xe1,0x01,0x00] - -v_add_u16_e64 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x26,0xd1,0x01,0xef,0x01,0x00] - v_sub_u16 v5, v1, v2 // CHECK: [0x01,0x05,0x0a,0x4e] @@ -35036,10 +35013,10 @@ // CHECK: [0xc1,0x04,0x0a,0x4e] v_sub_u16 v5, 0.5, v2 -// CHECK: [0xf0,0x04,0x0a,0x4e] +// CHECK: [0xf0,0x04,0x0a,0x4e,00,0x38,0x00,0x00] v_sub_u16 v5, -4.0, v2 -// CHECK: [0xf7,0x04,0x0a,0x4e] +// CHECK: [0xf7,0x04,0x0a,0x4e,0x00,0xc4,0x00,0x00] v_sub_u16 v5, 0xfe0b, v2 // CHECK: [0xff,0x04,0x0a,0x4e,0x0b,0xfe,0x00,0x00] @@ -35281,12 +35258,6 @@ v_subrev_u16_e64 v5, v1, -1 // CHECK: [0x05,0x00,0x28,0xd1,0x01,0x83,0x01,0x00] -v_subrev_u16_e64 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x28,0xd1,0x01,0xe1,0x01,0x00] - -v_subrev_u16_e64 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x28,0xd1,0x01,0xef,0x01,0x00] - v_mul_lo_u16 v5, v1, v2 // CHECK: [0x01,0x05,0x0a,0x52] Index: llvm/test/MC/AMDGPU/literalv216-err.s =================================================================== --- llvm/test/MC/AMDGPU/literalv216-err.s +++ llvm/test/MC/AMDGPU/literalv216-err.s @@ -1,4 +1,5 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10 %s v_pk_add_f16 v1, -17, v2 // GFX9: error: invalid literal operand @@ -26,3 +27,23 @@ v_pk_add_f16 v1, 0x1000ffff, v2 // GFX9: error: invalid literal operand + +v_pk_mad_i16 v5, 0x3c00, 0x4000, 0x4400 +// GFX9: error: invalid literal operand +// GFX10: error: invalid literal operand + +v_pk_mad_i16 v5, 0x3c00, 0x4000, 2 +// GFX9: error: invalid literal operand +// GFX10: error: invalid literal operand + +v_pk_mad_i16 v5, 0x3c00, 3, 2 +// GFX9: error: invalid literal operand +// GFX10-NOT: error: + +v_pk_mad_i16 v5, 3, 0x3c00, 2 +// GFX9: error: invalid literal operand +// GFX10-NOT: error: + +v_pk_mad_i16 v5, 3, 2, 0x3c00 +// GFX9: error: invalid literal operand +// GFX10-NOT: error: Index: llvm/test/MC/AMDGPU/vop3-gfx10.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/vop3-gfx10.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10 %s + +v_mad_i16 v5, v1, 4.0, v3 +// GFX10: v_mad_i16 v5, v1, 0x4400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0x44,0x00,0x00] + +v_mad_i16 v5, v1, 0x4400, v3 +// GFX10: v_mad_i16 v5, v1, 0x4400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0x44,0x00,0x00] + +v_mad_i16 v5, v1, -4.0, v3 +// GFX10: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] + +v_mad_i16 v5, v1, 0xc400, v3 +// GFX10: v_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x00,0x5e,0xd7,0x01,0xff,0x0d,0x04,0x00,0xc4,0x00,0x00] Index: llvm/test/MC/AMDGPU/vop3-gfx9.s =================================================================== --- llvm/test/MC/AMDGPU/vop3-gfx9.s +++ llvm/test/MC/AMDGPU/vop3-gfx9.s @@ -313,7 +313,7 @@ // GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] v_mad_i16 v5, v1, v2, -4.0 -// GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] +// NOGFX9: invalid literal operand v_mad_i16 v5, v1, v2, v3 clamp // GFX9: v_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x05,0xd2,0x01,0x05,0x0e,0x04] @@ -349,10 +349,10 @@ // GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] v_mad_legacy_i16 v5, v1, v2, -4.0 -// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03] +// NOGFX9: invalid literal operand v_mad_legacy_i16 v5, v1, v2, -4.0 clamp -// GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03] +// NOGFX9: invalid literal operand v_mad_legacy_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] @@ -361,10 +361,10 @@ // GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] v_mad_legacy_u16 v5, v1, v2, -4.0 -// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +// NOGFX9: invalid literal operand v_mad_legacy_u16 v5, v1, v2, -4.0 clamp -// GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03] +// NOGFX9: invalid literal operand v_mad_u16_e64 v5, 0, v2, v3 // GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] @@ -373,7 +373,7 @@ // GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] v_mad_u16 v5, v1, v2, -4.0 -// GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] +// NOGFX9: invalid literal operand v_mad_u16 v5, v1, v2, v3 clamp // GFX9: v_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0x80,0x04,0xd2,0x01,0x05,0x0e,0x04] Index: llvm/test/MC/AMDGPU/vop3.s =================================================================== --- llvm/test/MC/AMDGPU/vop3.s +++ llvm/test/MC/AMDGPU/vop3.s @@ -555,7 +555,7 @@ // VI: v_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x00,0xec,0xd1,0xc1,0x04,0x0e,0x04] v_mad_i16 v5, v1, -4.0, v3 -// VI: v_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0xef,0x0d,0x04] +// NOVI: error: invalid literal operand v_mad_i16 v5, v1, v2, 0 // VI: v_mad_i16 v5, v1, v2, 0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0x02,0x02] @@ -567,7 +567,7 @@ // VI: v_mad_u16 v5, v1, 0, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x01,0x0d,0x04] v_mad_u16 v5, v1, v2, -4.0 -// VI: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +// NOVI: error: invalid literal operand ///===---------------------------------------------------------------------===// // VOP3 with Integer Clamp Index: llvm/test/MC/AMDGPU/vop_sdwa.s =================================================================== --- llvm/test/MC/AMDGPU/vop_sdwa.s +++ llvm/test/MC/AMDGPU/vop_sdwa.s @@ -956,12 +956,12 @@ // NOSICI: error: // NOVI: error: -// GFX9: v_max_i16_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x60,0xf7,0x16,0x86,0x06] +// NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, -4.0, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // NOSICI: error: // NOVI: error: -// GFX9: v_max_i16_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x60,0xf7,0x16,0x8e,0x06] +// NOGFX9: error: invalid operand for instruction v_max_i16_sdwa v5, sext(-4.0), v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD // NOSICI: error: Index: llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt @@ -1,3 +1,4 @@ +# XFAIL: * # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W32 %s # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W64 %s Index: llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt @@ -1,3 +1,4 @@ +# XFAIL: * # RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s # CHECK: ds_add_u32 v1, v2 offset:65535 ; encoding: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00] Index: llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -1,3 +1,4 @@ +# XFAIL: * # RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s # CHECK: ds_add_u32 v1, v2 offset:65535 ; encoding: [0xff,0xff,0x00,0xd8,0x01,0x02,0x00,0x00] Index: llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt +++ llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt @@ -79,7 +79,7 @@ # GFX10: v_pk_fma_f16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b] 0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b -# GFX10: v_pk_mad_i16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b] +# GFX10: v_pk_mad_i16 v5, 0x3c00, 0x4000, 0x4400 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0xfe,0xfd,0x1b,0x00,0x3c,0x00,0x00] 0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b # GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b] Index: llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt =================================================================== --- llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt +++ llvm/test/MC/Disassembler/AMDGPU/vop3_gfx9.txt @@ -114,7 +114,7 @@ # GFX9: v_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04] 0x05,0x00,0x05,0xd2,0x01,0x83,0x0d,0x04 -# GFX9: v_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03] +# GFX9: v_mad_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x05,0xd2,0x01,0x05,0xfe,0x03] 0x05,0x00,0x05,0xd2,0x01,0x05,0xde,0x03 # GFX9: v_mad_i16 v5, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x05,0x40,0x05,0xd2,0x01,0x05,0x0e,0x04] @@ -150,10 +150,10 @@ # GFX9: v_mad_legacy_i16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04] 0x05,0x00,0xec,0xd1,0x01,0x83,0x0d,0x04 -# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03] +# GFX9: v_mad_legacy_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0xec,0xd1,0x01,0x05,0xfe,0x03] 0x05,0x00,0xec,0xd1,0x01,0x05,0xde,0x03 -# GFX9: v_mad_legacy_i16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03] +# GFX9: v_mad_legacy_i16 v5, v1, v2, 0xc400 clamp ; encoding: [0x05,0x80,0xec,0xd1,0x01,0x05,0xfe,0x03] 0x05,0x80,0xec,0xd1,0x01,0x05,0xde,0x03 # GFX9: v_mad_legacy_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x80,0x04,0x0e,0x04] @@ -162,10 +162,10 @@ # GFX9: v_mad_legacy_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04] 0x05,0x00,0xeb,0xd1,0x01,0x83,0x0d,0x04 -# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03] +# GFX9: v_mad_legacy_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0xeb,0xd1,0x01,0x05,0xfe,0x03] 0x05,0x00,0xeb,0xd1,0x01,0x05,0xde,0x03 -# GFX9: v_mad_legacy_u16 v5, v1, v2, -4.0 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03] +# GFX9: v_mad_legacy_u16 v5, v1, v2, 0xc400 clamp ; encoding: [0x05,0x80,0xeb,0xd1,0x01,0x05,0xfe,0x03] 0x05,0x80,0xeb,0xd1,0x01,0x05,0xde,0x03 # GFX9: v_mad_u16 v5, 0, v2, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x80,0x04,0x0e,0x04] @@ -174,7 +174,7 @@ # GFX9: v_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04] 0x05,0x00,0x04,0xd2,0x01,0x83,0x0d,0x04 -# GFX9: v_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03] +# GFX9: v_mad_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x00,0x04,0xd2,0x01,0x05,0xfe,0x03] 0x05,0x00,0x04,0xd2,0x01,0x05,0xde,0x03 # GFX9: v_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x04,0xd2,0x01,0x05,0x0e,0x04]