Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -29,6 +29,7 @@ #include "SIDefines.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "SIInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" @@ -344,6 +345,8 @@ bool FlatUsed = false; const SIRegisterInfo *RI = static_cast(STM.getRegisterInfo()); + const SIInstrInfo *TII = + static_cast(STM.getInstrInfo()); for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { @@ -353,8 +356,8 @@ if (MI.isDebugValue()) continue; - // FIXME: This is reporting 0 for many instructions. - CodeSize += MI.getDesc().Size; + if (isVerbose()) + CodeSize += TII->getInstSizeInBytes(MI); unsigned numOperands = MI.getNumOperands(); for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { Index: lib/Target/AMDGPU/CIInstructions.td =================================================================== --- lib/Target/AMDGPU/CIInstructions.td +++ lib/Target/AMDGPU/CIInstructions.td @@ -312,7 +312,7 @@ >; class FlatStoreAtomicPat : Pat < - // atomic store follows aotmic binop convenction so the address comes first + // atomic store follows atomic binop convenction so the address comes first. (node i64:$addr, vt:$data), (inst $addr, $data, 1, 0, 0) >; Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -186,6 +186,11 @@ const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); unsigned bytes = Desc.getSize(); + // Instructions sizes that can't just be determined by the opcode have a base + // size of 4, and then the following 4 byte literal. + if (bytes == 0) + bytes = 4; + for (unsigned i = 0; i < bytes; i++) { OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); } @@ -266,7 +271,7 @@ const MCRegisterClass &RC = MRI.getRegClass(RCID); uint32_t Enc = getLitEncoding(MO, RC.getSize()); - if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4)) + if (Enc != ~0U && (Enc != 255 || Desc.getSize() != 8)) return Enc; } else if (MO.isImm()) Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -83,16 +83,19 @@ field bits<1> DisableDecoder = 0; let isAsmParserOnly = !if(!eq(DisableDecoder{0}, {0}), 0, 1); + + // Similar to Size field. Get encoding size for the format ignoring + // 32-bit literals that could really make the full instruction + // encoding be 8 bytes. + int BaseSize = 4; } class Enc32 { field bits<32> Inst; - int Size = 4; } class Enc64 { field bits<64> Inst; - int Size = 8; } class VOPDstOperand : RegisterOperand ; @@ -121,14 +124,12 @@ VOPAnyCommon { let VOP1 = 1; - let Size = 4; } class VOP2Common pattern> : VOPAnyCommon { let VOP2 = 1; - let Size = 4; } class VOP3Common pattern, bit HasMods = 0, bit VOP3Only = 0> : @@ -149,7 +150,8 @@ !if(!eq(HasMods,1), "cvtVOP3_2_mod", "cvtVOP3_2_nomod")); let isCodeGenOnly = 0; - int Size = 8; + let Size = 8; + let BaseSize = 8; // Because SGPRs may be allowed if there are multiple operands, we // need a post-isel hook to insert copies in order to avoid @@ -321,6 +323,7 @@ let hasSideEffects = 0; let SALU = 1; let SOPP = 1; + let Size = 4; let UseNamedOperandTable = 1; } @@ -332,6 +335,7 @@ let LGKM_CNT = 1; let SMRD = 1; + let mayStore = 0; let mayLoad = 1; let hasSideEffects = 0; @@ -643,6 +647,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; + let Size = 4; } } // End Uses = [EXEC] @@ -656,6 +661,8 @@ let LGKM_CNT = 1; let DS = 1; + let Size = 8; + let BaseSize = 8; let UseNamedOperandTable = 1; let Uses = [M0, EXEC]; @@ -674,6 +681,8 @@ let VM_CNT = 1; let EXP_CNT = 1; let MUBUF = 1; + let Size = 8; + let BaseSize = 8; let Uses = [EXEC]; let hasSideEffects = 0; @@ -688,6 +697,8 @@ let VM_CNT = 1; let EXP_CNT = 1; let MTBUF = 1; + let Size = 8; + let BaseSize = 8; let Uses = [EXEC]; let hasSideEffects = 0; @@ -703,6 +714,8 @@ // and are not considered done until both have been decremented. let VM_CNT = 1; let LGKM_CNT = 1; + let Size = 8; + let BaseSize = 8; let Uses = [EXEC, FLAT_SCR]; // M0 @@ -718,6 +731,8 @@ let VM_CNT = 1; let EXP_CNT = 1; let MIMG = 1; + let Size = 8; + let BaseSize = 8; let Uses = [EXEC]; let UseNamedOperandTable = 1; Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -472,6 +472,7 @@ ArrayRef> getSerializableTargetIndices() const override; + unsigned getInstSizeInBytes(const MachineInstr &MI) const; }; namespace AMDGPU { Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2959,3 +2959,52 @@ {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; return makeArrayRef(TargetIndices); } + +unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); + const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); + unsigned DescSize = Desc.getSize(); + + // If we have a definitive size, we can use it. Otherwise we need to inspect + // the operands to know the size. + if (DescSize == 8 || DescSize == 4) + return DescSize; + + assert(DescSize == 0); + + // 4-byte instructions may have a 32-bit literal encoded after them. Check + // operands that coud ever be literals. + if (isVALU(MI) || isSALU(MI)) { + int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); + if (Src0Idx == -1) + return 4; // No operands. + + if (isLiteralConstant(MI.getOperand(Src0Idx), getOpSize(MI, Src0Idx))) + return 8; + + int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); + if (Src1Idx == -1) + return 4; + + if (isLiteralConstant(MI.getOperand(Src1Idx), getOpSize(MI, Src1Idx))) + return 8; + + return 4; + } + + switch (Opc) { + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::DBG_VALUE: + case TargetOpcode::BUNDLE: + case TargetOpcode::EH_LABEL: + return 0; + case TargetOpcode::INLINEASM: { + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + default: + llvm_unreachable("unable to find instruction size"); + } +} Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -822,6 +822,7 @@ [] > { let EXP_CNT = 1; + let Size = 8; let Uses = [EXEC]; let SchedRW = [WriteExport]; } @@ -942,7 +943,6 @@ SIMCInstr { let isPseudo = 1; let isCodeGenOnly = 1; - let Size = 4; // Pseudo instructions have no encodings, but adding this field here allows // us to do: @@ -1104,6 +1104,7 @@ let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; let isCodeGenOnly = 0; + let Size = 8; } def _vi : SOPK , @@ -1113,6 +1114,7 @@ let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; let isCodeGenOnly = 0; + let Size = 8; } } //===----------------------------------------------------------------------===// @@ -1134,6 +1136,7 @@ let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; + let Size = 4; } class SMRD_SOFF_Real_si op, string opName, dag outs, dag ins, @@ -1144,6 +1147,7 @@ let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; + let Size = 4; } @@ -1155,6 +1159,7 @@ let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; + let Size = 8; } class SMRD_SOFF_Real_vi op, string opName, dag outs, dag ins, @@ -1165,6 +1170,7 @@ let AssemblerPredicates = [isVI]; let DecoderNamespace = "VI"; let DisableDecoder = DisableVIDecoder; + let Size = 8; } @@ -1254,6 +1260,7 @@ opName#" $sdst, $sbase, $offset", []>, SMRD_IMMe_ci { let AssemblerPredicates = [isCIOnly]; let DecoderNamespace = "CI"; + let Size = 8; } defm _SGPR : SMRD_SOFF_m < @@ -2204,6 +2211,7 @@ let AssemblerPredicates = [isSICI]; let DecoderNamespace = "SICI"; let DisableDecoder = DisableSIDecoder; + let Size = 8; } def _vi : VOP2Common