Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -1260,8 +1260,9 @@ /// Measure the specified inline asm to determine an approximation of its /// length. - virtual unsigned getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const; + virtual unsigned getInlineAsmLength( + const char *Str, const MCAsmInfo &MAI, + const TargetSubtargetInfo *STI = nullptr) const; /// Allocate and return a hazard recognizer to use for this target when /// scheduling the machine instructions before register allocation. Index: include/llvm/MC/MCAsmInfo.h =================================================================== --- include/llvm/MC/MCAsmInfo.h +++ include/llvm/MC/MCAsmInfo.h @@ -27,6 +27,7 @@ class MCExpr; class MCSection; class MCStreamer; +class MCSubtargetInfo; class MCSymbol; namespace WinEH { @@ -473,7 +474,13 @@ bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; } bool hasCOFFAssociativeComdats() const { return HasCOFFAssociativeComdats; } bool hasCOFFComdatConstants() const { return HasCOFFComdatConstants; } - unsigned getMaxInstLength() const { return MaxInstLength; } + + /// Returns the maximum possible encoded instruction size in bytes. If \p STI + /// is null, this should be the maximum size for any subtarget. + virtual unsigned getMaxInstLength(const MCSubtargetInfo *STI = nullptr) const { + return MaxInstLength; + } + unsigned getMinInstAlignment() const { return MinInstAlignment; } bool getDollarIsPC() const { return DollarIsPC; } const char *getSeparatorString() const { return SeparatorString; } Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -85,11 +85,13 @@ /// simple--i.e. not a logical or arithmetic expression--size values without /// the optional fill value. This is primarily used for creating arbitrary /// sized inline asm blocks for testing purposes. -unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const { +unsigned TargetInstrInfo::getInlineAsmLength( + const char *Str, + const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const { // Count the number of instructions in the asm. bool AtInsnStart = true; unsigned Length = 0; + const unsigned MaxInstLength = MAI.getMaxInstLength(STI); for (; *Str; ++Str) { if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) { @@ -101,7 +103,7 @@ } if (AtInsnStart && !std::isspace(static_cast(*Str))) { - unsigned AddLength = MAI.getMaxInstLength(); + unsigned AddLength = MaxInstLength; if (strncmp(Str, ".space", 6) == 0) { char *EStr; int SpaceSize; Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -27,6 +27,7 @@ public: explicit AMDGPUMCAsmInfo(const Triple &TT); bool shouldOmitSectionDirective(StringRef SectionName) const override; + unsigned getMaxInstLength(const MCSubtargetInfo *STI) const override; }; } // namespace llvm #endif Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -9,6 +9,8 @@ #include "AMDGPUMCAsmInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" using namespace llvm; @@ -18,7 +20,10 @@ HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MinInstAlignment = 4; - MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 8 : 16; + + // This is the maximum instruction encoded size for gfx10. With a known + // subtarget, it can be reduced to 8 bytes. + MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 12 : 16; SeparatorString = "\n"; CommentString = ";"; PrivateLabelPrefix = ""; @@ -44,3 +49,12 @@ SectionName == ".hsarodata_readonly_agent" || MCAsmInfo::shouldOmitSectionDirective(SectionName); } + +unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const { + if (!STI || STI->getTargetTriple().getArch() == Triple::r600) + return MaxInstLength; + + // FIXME: This is incorrect for r600, but it doesn't support inlineasm so it + // doesn't really matter. + return STI->getFeatureBits()[AMDGPU::FeatureGFX10] ? 12 : 8; +} Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5574,7 +5574,8 @@ case TargetOpcode::INLINEASM_BR: { const MachineFunction *MF = MI.getParent()->getParent(); const char *AsmStr = MI.getOperand(0).getSymbolName(); - return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), + &MF->getSubtarget()); } default: return DescSize; Index: lib/Target/Hexagon/HexagonInstrInfo.h =================================================================== --- lib/Target/Hexagon/HexagonInstrInfo.h +++ lib/Target/Hexagon/HexagonInstrInfo.h @@ -264,8 +264,10 @@ /// Measure the specified inline asm to determine an approximation of its /// length. - unsigned getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const override; + unsigned getInlineAsmLength( + const char *Str, + const MCAsmInfo &MAI, + const TargetSubtargetInfo *STI = nullptr) const override; /// Allocate and return a hazard recognizer to use for this target when /// scheduling the machine instructions after register allocation. Index: lib/Target/Hexagon/HexagonInstrInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonInstrInfo.cpp +++ lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1712,17 +1712,19 @@ /// Hexagon counts the number of ##'s and adjust for that many /// constant exenders. unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const { + const MCAsmInfo &MAI, + const TargetSubtargetInfo *STI) const { StringRef AStr(Str); // Count the number of instructions in the asm. bool atInsnStart = true; unsigned Length = 0; + const unsigned MaxInstLength = MAI.getMaxInstLength(STI); for (; *Str; ++Str) { if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), strlen(MAI.getSeparatorString())) == 0) atInsnStart = true; if (atInsnStart && !std::isspace(static_cast(*Str))) { - Length += MAI.getMaxInstLength(); + Length += MaxInstLength; atInsnStart = false; } if (atInsnStart && strncmp(Str, MAI.getCommentString().data(), Index: test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s + +; Make sure the code size estimate for inline asm is 12-bytes per +; instruction, rather than 8 in previous generations. + +; GCN-LABEL: {{^}}long_forward_branch_gfx10only: +; GFX9: s_cmp_eq_u32 +; GFX9-NEXT: s_cbranch_scc1 + +; GFX10: s_cmp_eq_u32 +; GFX10-NEXT: s_cbranch_scc0 +; GFX10: s_getpc_b64 +; GFX10: s_add_u32 +; GFX10: s_addc_u32 +; GFX10: s_setpc_b64 +define amdgpu_kernel void @long_forward_branch_gfx10only(i32 addrspace(1)* %arg, i32 %cnd) #0 { +bb0: + %cmp = icmp eq i32 %cnd, 0 + br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch + +bb2: + ; Estimated as 48-bytes on gfx10 (requiring a long branch), but + ; 32-bytes on gfx9 (allowing a short branch) + call void asm sideeffect + "v_nop_e64 + v_nop_e64 + v_nop_e64", ""() #0 + br label %bb3 + +bb3: + store volatile i32 %cnd, i32 addrspace(1)* %arg + ret void +}