Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -661,6 +661,49 @@ // AsmParser //===----------------------------------------------------------------------===// +// Holds info related to the current kernel, e.g. count of SGPRs used. +// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next +// .amdgpu_hsa_kernel or at EOF. +class KernelScopeInfo { + int SgprIndexUnusedMin; + int VgprIndexUnusedMin; + MCContext *Ctx; + + void usesSgprAt(int i) { + if (i >= SgprIndexUnusedMin) { + SgprIndexUnusedMin = ++i; + if (Ctx) { + MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); + Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); + } + } + } + void usesVgprAt(int i) { + if (i >= VgprIndexUnusedMin) { + VgprIndexUnusedMin = ++i; + if (Ctx) { + MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); + Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); + } + } + } +public: + KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr) + {} + void initialize(MCContext &Context) { + Ctx = &Context; + usesSgprAt(SgprIndexUnusedMin = -1); + usesVgprAt(VgprIndexUnusedMin = -1); + } + void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { + switch (RegKind) { + case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; + case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; + default: break; + } + } +}; + class AMDGPUAsmParser : public MCTargetAsmParser { const MCInstrInfo &MII; MCAsmParser &Parser; @@ -668,6 +711,7 @@ unsigned ForcedEncodingSize; bool ForcedDPP; bool ForcedSDWA; + KernelScopeInfo KernelScope; /// @name Auto-generated Match Functions /// { @@ -693,7 +737,7 @@ bool ParseSectionDirectiveHSADataGlobalProgram(); bool ParseSectionDirectiveHSARodataReadonlyAgent(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); - bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth); + bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex); void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn); public: @@ -731,6 +775,7 @@ Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx)); } + KernelScope.initialize(getContext()); } bool isSI() const { @@ -1240,8 +1285,9 @@ } } -bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth) +bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex) { + if (DwordRegIndex) { *DwordRegIndex = 0; } const MCRegisterInfo *TRI = getContext().getRegisterInfo(); if (getLexer().is(AsmToken::Identifier)) { StringRef RegName = Parser.getTok().getString(); @@ -1301,7 +1347,7 @@ } else if (getLexer().is(AsmToken::LBrac)) { // List of consecutive registers: [s0,s1,s2,s3] Parser.Lex(); - if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) return false; if (RegWidth != 1) return false; @@ -1313,7 +1359,7 @@ } else if (getLexer().is(AsmToken::RBrac)) { Parser.Lex(); break; - } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1)) { + } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { if (RegWidth1 != 1) { return false; } @@ -1341,11 +1387,12 @@ { unsigned Size = 1; if (RegKind == IS_SGPR || RegKind == IS_TTMP) { - // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords. + // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. Size = std::min(RegWidth, 4u); } if (RegNum % Size != 0) return false; + if (DwordRegIndex) { *DwordRegIndex = RegNum; } RegNum = RegNum / Size; int RCID = getRegClass(RegKind, RegWidth); if (RCID == -1) @@ -1371,11 +1418,12 @@ SMLoc StartLoc = Tok.getLoc(); SMLoc EndLoc = Tok.getEndLoc(); RegisterKind RegKind; - unsigned Reg, RegNum, RegWidth; + unsigned Reg, RegNum, RegWidth, DwordRegIndex; - if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { + if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { return nullptr; } + KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); } @@ -1842,6 +1890,7 @@ getTargetStreamer().EmitAMDGPUSymbolType(KernelName, ELF::STT_AMDGPU_HSA_KERNEL); Lex(); + KernelScope.initialize(getContext()); return false; } Index: llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s +++ llvm/trunk/test/MC/AMDGPU/sym_kernel_scope.s @@ -0,0 +1,59 @@ +// RUN: llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s + +.byte .kernel.sgpr_count +// CHECK: .byte 0 +.byte .kernel.vgpr_count +// CHECK: .byte 0 + v_mov_b32_e32 v5, s8 + s_endpgm +.byte .kernel.sgpr_count +// CHECK: .byte 9 +.byte .kernel.vgpr_count +// CHECK: .byte 6 + +.amdgpu_hsa_kernel K1 +K1: +.byte .kernel.sgpr_count +// CHECK: .byte 0 +.byte .kernel.vgpr_count +// CHECK: .byte 0 + v_mov_b32_e32 v1, s86 + s_endpgm +.byte .kernel.sgpr_count +// CHECK: .byte 87 +.byte .kernel.vgpr_count +// CHECK: .byte 2 + +.amdgpu_hsa_kernel K2 +.byte .kernel.sgpr_count +// CHECK: .byte 0 +.byte .kernel.vgpr_count +// CHECK: .byte 0 +K2: + s_load_dwordx8 s[16:23], s[0:1], 0x0 + v_mov_b32_e32 v0, v0 + s_endpgm +.byte .kernel.sgpr_count +// CHECK: .byte 24 +.byte .kernel.vgpr_count +// CHECK: .byte 1 + +.text +.amdgpu_hsa_kernel K3 +K3: +A = .kernel.vgpr_count + v_mov_b32_e32 v[A], s0 +B = .kernel.vgpr_count + v_mov_b32_e32 v[B], s0 + v_mov_b32_e32 v[B], v[A] +C = .kernel.vgpr_count + v_mov_b32_e32 v[C], v[A] +D = .kernel.sgpr_count + 3 // align +E = D + 4 + s_load_dwordx4 s[D:D+3], s[E:E+1], 0x0 + s_endpgm + +.byte .kernel.sgpr_count +// CHECK: .byte 10 +.byte .kernel.vgpr_count +// CHECK: .byte 3 Index: llvm/trunk/test/MC/AMDGPU/sym_option.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/sym_option.s +++ llvm/trunk/test/MC/AMDGPU/sym_option.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=SI %s | FileCheck %s --check-prefix=SI +// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire %s | FileCheck %s --check-prefix=BONAIRE +// RUN: llvm-mc -arch=amdgcn -mcpu=hawaii %s | FileCheck %s --check-prefix=HAWAII +// RUN: llvm-mc -arch=amdgcn -mcpu=kabini %s | FileCheck %s --check-prefix=KABINI +// RUN: llvm-mc -arch=amdgcn -mcpu=iceland %s | FileCheck %s --check-prefix=ICELAND +// RUN: llvm-mc -arch=amdgcn -mcpu=carrizo %s | FileCheck %s --check-prefix=CARRIZO +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga %s | FileCheck %s --check-prefix=TONGA +// RUN: llvm-mc -arch=amdgcn -mcpu=fiji %s | FileCheck %s --check-prefix=FIJI +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx804 %s | FileCheck %s --check-prefix=GFX804 +// RUN: llvm-mc -arch=amdgcn -mcpu=stoney %s | FileCheck %s --check-prefix=STONEY + +.byte .option.machine_version_major +// SI: .byte 0 +// BONAIRE: .byte 7 +// HAWAII: .byte 7 +// KABINI: .byte 7 +// ICELAND: .byte 8 +// CARRIZO: .byte 8 +// TONGA: .byte 8 +// FIJI: .byte 8 +// GFX804: .byte 8 +// STONEY: .byte 8 + +.byte .option.machine_version_minor +// SI: .byte 0 +// BONAIRE: .byte 0 +// HAWAII: .byte 0 +// KABINI: .byte 0 +// ICELAND: .byte 0 +// CARRIZO: .byte 0 +// TONGA: .byte 0 +// FIJI: .byte 0 +// GFX804: .byte 0 +// STONEY: .byte 1 + +.byte .option.machine_version_stepping +// SI: .byte 0 +// BONAIRE: .byte 0 +// HAWAII: .byte 1 +// KABINI: .byte 2 +// ICELAND: .byte 0 +// CARRIZO: .byte 1 +// TONGA: .byte 2 +// FIJI: .byte 3 +// GFX804: .byte 4 +// STONEY: .byte 0 Index: llvm/trunk/test/MC/AMDGPU/symbol_special.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/symbol_special.s +++ llvm/trunk/test/MC/AMDGPU/symbol_special.s @@ -1,46 +0,0 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=SI %s | FileCheck %s --check-prefix=SI -// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire %s | FileCheck %s --check-prefix=BONAIRE -// RUN: llvm-mc -arch=amdgcn -mcpu=hawaii %s | FileCheck %s --check-prefix=HAWAII -// RUN: llvm-mc -arch=amdgcn -mcpu=kabini %s | FileCheck %s --check-prefix=KABINI -// RUN: llvm-mc -arch=amdgcn -mcpu=iceland %s | FileCheck %s --check-prefix=ICELAND -// RUN: llvm-mc -arch=amdgcn -mcpu=carrizo %s | FileCheck %s --check-prefix=CARRIZO -// RUN: llvm-mc -arch=amdgcn -mcpu=tonga %s | FileCheck %s --check-prefix=TONGA -// RUN: llvm-mc -arch=amdgcn -mcpu=fiji %s | FileCheck %s --check-prefix=FIJI -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx804 %s | FileCheck %s --check-prefix=GFX804 -// RUN: llvm-mc -arch=amdgcn -mcpu=stoney %s | FileCheck %s --check-prefix=STONEY - -.byte .option.machine_version_major -// SI: .byte 0 -// BONAIRE: .byte 7 -// HAWAII: .byte 7 -// KABINI: .byte 7 -// ICELAND: .byte 8 -// CARRIZO: .byte 8 -// TONGA: .byte 8 -// FIJI: .byte 8 -// GFX804: .byte 8 -// STONEY: .byte 8 - -.byte .option.machine_version_minor -// SI: .byte 0 -// BONAIRE: .byte 0 -// HAWAII: .byte 0 -// KABINI: .byte 0 -// ICELAND: .byte 0 -// CARRIZO: .byte 0 -// TONGA: .byte 0 -// FIJI: .byte 0 -// GFX804: .byte 0 -// STONEY: .byte 1 - -.byte .option.machine_version_stepping -// SI: .byte 0 -// BONAIRE: .byte 0 -// HAWAII: .byte 1 -// KABINI: .byte 2 -// ICELAND: .byte 0 -// CARRIZO: .byte 1 -// TONGA: .byte 2 -// FIJI: .byte 3 -// GFX804: .byte 4 -// STONEY: .byte 0