Index: lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -31,10 +31,13 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include +#define DEBUG_TYPE "amdgpu-mcinstlower" + using namespace llvm; namespace { @@ -239,6 +242,19 @@ return AsmPrinter::lowerConstant(CV); } +static void checkInstSize(const MachineInstr &MI, const MCInst &MCI, + const GCNSubtarget &STI, MCContext &OutContext) { + SmallVector Fixups; + SmallVector CodeBytes; + raw_svector_ostream CodeStream(CodeBytes); + + std::unique_ptr InstEmitter(createSIMCCodeEmitter( + *STI.getInstrInfo(), *OutContext.getRegisterInfo(), OutContext)); + InstEmitter->encodeInstruction(MCI, CodeStream, Fixups, STI); + + assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(MI)); +} + void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; @@ -301,6 +317,8 @@ MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); + LLVM_DEBUG(checkInstSize(*MI, TmpInst, STI, OutContext)); + if (STI.dumpCode()) { // Disassemble instruction/operands to text. DisasmLines.resize(DisasmLines.size() + 1); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2398,8 +2398,7 @@ case AMDGPU::OPERAND_REG_INLINE_C_INT32: case AMDGPU::OPERAND_REG_INLINE_C_FP32: { int32_t Trunc = static_cast(Imm); - return Trunc == Imm && - AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); + return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); } case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: @@ -4777,12 +4776,6 @@ // If we have a definitive size, we can use it. Otherwise we need to inspect // the operands to know the size. - // - // FIXME: Instructions that have a base 32-bit encoding report their size as - // 4, even though they are really 8 bytes if they have a literal operand. - if (DescSize != 0 && DescSize != 4) - return DescSize; - if (isFixedSize(MI)) return DescSize; @@ -4791,23 +4784,27 @@ if (isVALU(MI) || isSALU(MI)) { int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); if (Src0Idx == -1) - return 4; // No operands. + return DescSize; // No operands. if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) - return 8; + return DescSize + 4; int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) - return 4; + return DescSize; if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) - return 8; + return DescSize + 4; - return 4; - } + int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); + if (Src2Idx == -1) + return DescSize; - if (DescSize == 4) - return 4; + if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx])) + return DescSize + 4; + + return DescSize; + } switch (Opc) { case TargetOpcode::IMPLICIT_DEF: @@ -4823,7 +4820,7 @@ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } default: - llvm_unreachable("unable to find instruction size"); + return DescSize; } } Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -716,12 +716,6 @@ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { -multiclass VOP32_Real_vi op> { - def _vi : - VOP2_Real(NAME), SIEncodingFamily.VI>, - VOP3e_vi(NAME).Pfl>; -} - multiclass VOP2_Real_MADK_vi op> { def _vi : VOP2_Real(NAME), SIEncodingFamily.VI>, VOP2_MADKe(NAME).Pfl>; @@ -899,9 +893,6 @@ defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; -defm V_READLANE_B32 : VOP32_Real_vi <0x289>; -defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; - defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -662,23 +662,23 @@ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { multiclass VOP3_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3e_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3e_vi (NAME).Pfl>; } multiclass VOP3be_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3be_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3be_vi (NAME).Pfl>; } multiclass VOP3OpSel_Real_gfx9 op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3OpSel_gfx9 (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 (NAME).Pfl>; } multiclass VOP3Interp_Real_vi op> { - def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, - VOP3Interp_vi (NAME).Pfl>; + def _vi : VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3Interp_vi (NAME).Pfl>; } } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" @@ -824,6 +824,9 @@ defm V_MUL_HI_U32 : VOP3_Real_vi <0x286>; defm V_MUL_HI_I32 : VOP3_Real_vi <0x287>; +defm V_READLANE_B32 : VOP3_Real_vi <0x289>; +defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>; + defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>; defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>; defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>; Index: test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0