Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -301,6 +301,26 @@ MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); +#ifdef EXPENSIVE_CHECKS + // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot + // work correctly for the generic CPU). + // + // The isPseudo check really shouldn't be here, but unfortunately there are + // some negative lit tests that depend on being able to continue through + // here even when pseudo instructions haven't been lowered. + if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU())) { + SmallVector Fixups; + SmallVector CodeBytes; + raw_svector_ostream CodeStream(CodeBytes); + + std::unique_ptr InstEmitter(createSIMCCodeEmitter( + *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext)); + InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI); + + assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); + } +#endif + if (STI.dumpCode()) { // Disassemble instruction/operands to text. DisasmLines.resize(DisasmLines.size() + 1); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2398,8 +2398,7 @@ case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: { int32_t Trunc = static_cast(Imm); - return Trunc == Imm && - AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); + return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); } case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: @@ -4975,12 +4974,6 @@ // If we have a definitive size, we can use it. Otherwise we need to inspect // the operands to know the size. - // - // FIXME: Instructions that have a base 32-bit encoding report their size as - // 4, even though they are really 8 bytes if they have a literal operand. - if (DescSize != 0 && DescSize != 4) - return DescSize; - if (isFixedSize(MI)) return DescSize; @@ -4989,23 +4982,27 @@ if (isVALU(MI) || isSALU(MI)) { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) - return 4; // No operands. + return DescSize; // No operands. if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) - return 8; + return DescSize + 4; int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) - return 4; + return DescSize; if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) - return 8; + return DescSize + 4; - return 4; - } + int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); + if (Src2Idx == -1) + return DescSize; - if (DescSize == 4) - return 4; + if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx])) + return DescSize + 4; + + return DescSize; + } switch (Opc) { case TargetOpcode::IMPLICIT_DEF: @@ -5021,7 +5018,7 @@ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } default: - llvm_unreachable("unable to find instruction size"); + return DescSize; } } Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -716,12 +716,6 @@ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { -multiclass VOP32_Real_vi op> { - def _vi : - VOP2_Real(NAME), SIEncodingFamily.VI>, - VOP3e_vi(NAME).Pfl>; -} - multiclass VOP2_Real_MADK_vi op> { def _vi : VOP2_Real(NAME), SIEncodingFamily.VI>, VOP2_MADKe(NAME).Pfl>; @@ -899,9 +893,6 @@ defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; -defm V_READLANE_B32 : VOP32_Real_vi <0x289>; -defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; - defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; Index: llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP3Instructions.td @@ -651,23 +651,23 @@ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { multiclass VOP3_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3e_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3e_vi (NAME).Pfl>; } multiclass VOP3be_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3be_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3be_vi (NAME).Pfl>; } multiclass VOP3OpSel_Real_gfx9 op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3OpSel_gfx9 (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 (NAME).Pfl>; } multiclass VOP3Interp_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3Interp_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3Interp_vi (NAME).Pfl>; } } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" @@ -813,6 +813,9 @@ defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>; defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>; +defm V_READLANE_B32 : VOP3_Real_vi <0x289>; +defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>; + defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>; defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>; defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>; Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0