Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -40,6 +40,8 @@ NumVGPR(0), NumSGPR(0), FlatUsed(false), + NumSGPRsForNumActiveWavesPerEU(0), + NumVGPRsForNumActiveWavesPerEU(0), ReservedVGPRFirst(0), ReservedVGPRCount(0), DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1), @@ -71,6 +73,14 @@ uint32_t LDSSize; bool FlatUsed; + // Number of SGPRs that meets number of active waves per execution unit + // request. + uint32_t NumSGPRsForNumActiveWavesPerEU; + + // Number of VGPRs that meets number of active waves per execution unit + // request. + uint32_t NumVGPRsForNumActiveWavesPerEU; + // If ReservedVGPRCount is 0 then must be 0. Otherwise, this is the first // fixed VGPR number reserved. uint16_t ReservedVGPRFirst; Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -202,6 +202,16 @@ OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) + " bytes/workgroup (compile time only)", false); + OutStreamer->emitRawComment(" SGPRBlocks: " + + Twine(KernelInfo.SGPRBlocks), false); + OutStreamer->emitRawComment(" VGPRBlocks: " + + Twine(KernelInfo.VGPRBlocks), false); + + OutStreamer->emitRawComment(" NumSGPRsForNumActiveWavesPerEU: " + + Twine(KernelInfo.NumSGPRsForNumActiveWavesPerEU), false); + OutStreamer->emitRawComment(" NumVGPRsForNumActiveWavesPerEU: " + + Twine(KernelInfo.NumVGPRsForNumActiveWavesPerEU), false); + OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst), false); OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount), @@ -446,16 +456,11 @@ ExtraSGPRs = 6; } - MaxSGPR += ExtraSGPRs; - // Record first reserved register and reserved register count fields, and // update max register counts if "amdgpu-debugger-reserve-regs" attribute was - // specified. - if (STM.debuggerReserveRegs()) { - ProgInfo.ReservedVGPRFirst = MaxVGPR + 1; - ProgInfo.ReservedVGPRCount = MFI->getDebuggerReservedVGPRCount(); - MaxVGPR += MFI->getDebuggerReservedVGPRCount(); - } + // requested. + ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; + ProgInfo.ReservedVGPRCount = RI->getDebuggerReservedNumVGPRs(STM); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue" @@ -467,11 +472,24 @@ RI->getHWRegIndex(MFI->getScratchRSrcReg()); } + // Account for extra SGPRs and VGPRs reserved for debugger usage. + MaxSGPR += ExtraSGPRs; + MaxVGPR += RI->getDebuggerReservedNumVGPRs(STM); + // We found the maximum register index. They start at 0, so add one to get the // number of registers. ProgInfo.NumVGPR = MaxVGPR + 1; ProgInfo.NumSGPR = MaxSGPR + 1; + // Adjust number of registers used to meet default/requested minimum/maximum + // number of active waves per execution unit request. + ProgInfo.NumSGPRsForNumActiveWavesPerEU = std::max( + ProgInfo.NumSGPR, + RI->getMinNumSGPRs(STM, MFI->getMaxNumActiveWavesPerEU())); + ProgInfo.NumVGPRsForNumActiveWavesPerEU = std::max( + ProgInfo.NumVGPR, + RI->getMinNumVGPRs(MFI->getMaxNumActiveWavesPerEU())); + if (STM.hasSGPRInitBug()) { if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) { LLVMContext &Ctx = MF.getFunction()->getContext(); @@ -482,6 +500,8 @@ } ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + ProgInfo.NumSGPRsForNumActiveWavesPerEU = + SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; } if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { @@ -498,8 +518,16 @@ Ctx.diagnose(Diag); } - ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4; - ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8; + // SGPRBlocks is actual number of SGPR blocks minus 1. + ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForNumActiveWavesPerEU, + RI->getSGPRAllocGranule()); + ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1; + + // VGPRBlocks is actual number of VGPR blocks minus 1. + ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForNumActiveWavesPerEU, + RI->getVGPRAllocGranule()); + ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1; + // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. ProgInfo.FloatMode = getFPMode(MF); @@ -525,8 +553,8 @@ LDSAlignShift = 9; } - unsigned LDSSpillSize = MFI->LDSWaveSpillSize * - MFI->getMaximumWorkGroupSize(MF); + unsigned LDSSpillSize = + MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize(); ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize; ProgInfo.LDSBlocks = Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -184,13 +184,12 @@ // TODO: Have some sort of hint or other heuristics to guess occupancy based // on other factors.. - unsigned OccupancyHint - = AMDGPU::getIntegerAttribute(F, "amdgpu-max-waves-per-eu", 0); + unsigned OccupancyHint = ST.getNumActiveWavesPerEU(F).second; if (OccupancyHint == 0) OccupancyHint = 7; // Clamp to max value. - OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerCU()); + OccupancyHint = std::min(OccupancyHint, ST.getMaxNumActiveWavesPerEU()); // Check the hint but ignore it if it's obviously wrong from the existing LDS // usage. @@ -650,9 +649,11 @@ if (AMDGPU::isShader(ContainingFunction.getCallingConv())) return; + const AMDGPUSubtarget &ST = + TM->getSubtarget(ContainingFunction); // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. - unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); + unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -263,14 +263,6 @@ return EnableXNACK; } - unsigned getMaxWavesPerCU() const { - if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) - return 10; - - // FIXME: Not sure what this is for other subtagets. - return 8; - } - /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset() const { @@ -289,6 +281,106 @@ bool enableSubRegLiveness() const override { return true; } + + /// \returns Number of execution units per compute unit supported by the + /// subtarget. + unsigned getNumEUsPerCU() const { + return 4; + } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given flat work group size. + unsigned getMaxNumWorkGroupsPerCU(unsigned FlatWorkGroupSize) const { + if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 8; + return getNumWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16; + } + + /// \returns Maximum number of active waves per compute unit supported by the + /// subtarget without any kind of limitation. + unsigned getMaxNumActiveWavesPerCU() const { + return getMaxNumActiveWavesPerEU() * getNumEUsPerCU(); + } + + /// \returns Maximum number of active waves per compute unit supported by the + /// subtarget and limited by given flat work group size. + unsigned getMaxNumActiveWavesPerCU(unsigned FlatWorkGroupSize) const { + unsigned NumWavesPerWorkGroup = + getNumWavesPerWorkGroup(FlatWorkGroupSize); + unsigned MaxNumWorkGroupsPerCU = + getMaxNumWorkGroupsPerCU(FlatWorkGroupSize); + unsigned MaxNumActiveWavesPerCU = + NumWavesPerWorkGroup * MaxNumWorkGroupsPerCU; + MaxNumActiveWavesPerCU = + std::min(MaxNumActiveWavesPerCU, getMaxNumActiveWavesPerCU()); + MaxNumActiveWavesPerCU = + alignDown(MaxNumActiveWavesPerCU, NumWavesPerWorkGroup); + MaxNumActiveWavesPerCU = MaxNumActiveWavesPerCU / NumWavesPerWorkGroup; + MaxNumActiveWavesPerCU = MaxNumActiveWavesPerCU * NumWavesPerWorkGroup; + return MaxNumActiveWavesPerCU; + } + + /// \returns Minimum number of active waves per execution unit supported by + /// the subtarget. + unsigned getMinNumActiveWavesPerEU() const { + return 1; + } + + /// \returns Maximum number of active waves per execution unit supported by + /// the subtarget without any kind of limitation. + unsigned getMaxNumActiveWavesPerEU() const { + // FIXME: Not sure what this is for subtargets below Southern Islands. + if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 8; + // FIXME: Need to take scratch memory into account. + return 10; + } + + /// \returns Maximum number of active waves per execution unit supported by + /// the subtarget and limited by given flat work group size. + unsigned getMaxNumActiveWavesPerEU(unsigned FlatWorkGroupSize) const { + unsigned MaxNumActiveWavesPerCU = + getMaxNumActiveWavesPerCU(FlatWorkGroupSize); + unsigned MaxNumActiveWavesPerEU = + alignDown(MaxNumActiveWavesPerCU, getNumEUsPerCU()); + MaxNumActiveWavesPerEU = MaxNumActiveWavesPerEU / getNumEUsPerCU(); + return MaxNumActiveWavesPerEU; + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const { + return 1; + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const { + return 2048; + } + + /// \returns Number of waves per work group given the flat work group size. + unsigned getNumWavesPerWorkGroup(unsigned FlatWorkGroupSize) const { + return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize(); + } + + /// \returns Subtarget's default pair of minimum/maximum flat work group sizes + /// for function \p F, or minimum/maximum flat work group sizes explicitly + /// requested using "amdgpu-flat-work-group-size" attribute attached to + /// function \p F. + /// + /// \returns Subtarget's default values if explicitly requested values cannot + /// be converted to integer, or violate subtarget's specifications. + std::pair getFlatWorkGroupSizes(const Function &F) const; + + /// \returns Subtarget's default pair of minimum/maximum number of active + /// waves per execution unit for function \p F, or minimum/maximum number of + /// active waves per execution unit explicitly requested using + /// "amdgpu-num-active-waves-per-eu" attribute attached to function \p F. + /// + /// \returns Subtarget's default values if explicitly requested values cannot + /// be converted to integer, violate subtarget's specifications, or are not + /// compatible with minimum/maximum number of active waves limited by flat + /// work group size, register usage, and/or lds usage. + std::pair getNumActiveWavesPerEU(const Function &F) const; }; class R600Subtarget final : public AMDGPUSubtarget { Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -178,6 +178,82 @@ return 1; } +std::pair AMDGPUSubtarget::getFlatWorkGroupSizes( + const Function &F) const { + + // Default minimum/maximum flat work group sizes. + std::pair Default = + AMDGPU::isCompute(F.getCallingConv()) ? + std::pair(128, 256) : + std::pair(1, getWavefrontSize()); + + // Requested minimum/maximum flat work group sizes. + std::pair Requested = + AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size", Default); + + // Make sure requested minimum is less than requested maximum. + if (Requested.first > Requested.second) + return Default; + + // Make sure requested values do not violate subtarget's specifications. + if (Requested.first < getMinFlatWorkGroupSize()) + return Default; + if (Requested.second > getMaxFlatWorkGroupSize()) + return Default; + + return Requested; +} + +std::pair AMDGPUSubtarget::getNumActiveWavesPerEU( + const Function &F) const { + + // Default minimum/maximum number of active waves per execution unit. + std::pair Default(1, 0); + + // Default/requested minimum/maximum flat work group sizes. + std::pair FlatWorkGroupSizes = getFlatWorkGroupSizes(F); + + // If minimum/maximum flat work group sizes were explicitly requested using + // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum + // number of active waves per execution unit to values implied by requested + // minimum/maximum flat work group sizes. + unsigned ImpliedByMinFlatWorkGroupSize = + getMaxNumActiveWavesPerEU(FlatWorkGroupSizes.first); + unsigned ImpliedByMaxFlatWorkGroupSize = + getMaxNumActiveWavesPerEU(FlatWorkGroupSizes.second); + unsigned MinImpliedByFlatWorkGroupSize = + std::min(ImpliedByMinFlatWorkGroupSize, ImpliedByMaxFlatWorkGroupSize); + unsigned MaxImpliedByFlatWorkGroupSize = + std::max(ImpliedByMinFlatWorkGroupSize, ImpliedByMaxFlatWorkGroupSize); + if (F.hasFnAttribute("amdgpu-flat-work-group-size")) { + Default.first = MinImpliedByFlatWorkGroupSize; + Default.second = MaxImpliedByFlatWorkGroupSize; + } + + // Requested minimum/maximum number of active waves per execution unit. + std::pair Requested = AMDGPU::getIntegerPairAttribute( + F, "amdgpu-num-active-waves-per-eu", Default, true); + + // Make sure requested minimum is less than requested maximum. + if (Requested.second && Requested.first > Requested.second) + return Default; + + // Make sure requested values do not violate subtarget's specifications. + if (Requested.first < getMinNumActiveWavesPerEU() || + Requested.first > getMaxNumActiveWavesPerEU()) + return Default; + if (Requested.second > getMaxNumActiveWavesPerEU()) + return Default; + + // Make sure requested values are compatible with values implied by requested + // minimum/maximum flat work group sizes. + if (Requested.first > MinImpliedByFlatWorkGroupSize || + Requested.second > MaxImpliedByFlatWorkGroupSize) + return Default; + + return Requested; +} + R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : AMDGPUSubtarget(TT, GPU, FS, TM), Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -729,7 +729,7 @@ const SISubtarget &ST = MF->getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); DebugLoc DL = MBB.findDebugLoc(MI); - unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); + unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize(); unsigned WavefrontSize = ST.getWavefrontSize(); unsigned TIDReg = MFI->getTIDReg(); Index: lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -60,10 +60,14 @@ unsigned PSInputAddr; bool ReturnsVoid; - unsigned MaximumWorkGroupSize; + // A pair of default/requested minimum/maximum flat work group sizes. + // Minimum - first, maximum - second. + std::pair FlatWorkGroupSizes; + + // A pair of default/requested minimum/maximum number of active waves per + // execution unit. Minimum - first, maximum - second. + std::pair NumActiveWavesPerEU; - // Number of reserved VGPRs for debugger usage. - unsigned DebuggerReservedVGPRCount; // Stack object indices for work group IDs. std::array DebuggerWorkGroupIDStackObjectIndices; // Stack object indices for work item IDs. @@ -352,9 +356,38 @@ ReturnsVoid = Value; } - /// \returns Number of reserved VGPRs for debugger usage. - unsigned getDebuggerReservedVGPRCount() const { - return DebuggerReservedVGPRCount; + /// \returns A pair of default/requested minimum/maximum flat work group sizes + /// for this function. + std::pair getFlatWorkGroupSizes() const { + return FlatWorkGroupSizes; + } + + /// \returns Default/requested minimum flat work group size for this function. + unsigned getMinFlatWorkGroupSize() const { + return FlatWorkGroupSizes.first; + } + + /// \returns Default/requested maximum flat work group size for this function. + unsigned getMaxFlatWorkGroupSize() const { + return FlatWorkGroupSizes.second; + } + + /// \returns A pair of default/requested minimum/maximum number of active + /// waves per execution unit. + std::pair getNumActiveWavesPerEU() const { + return NumActiveWavesPerEU; + } + + /// \returns Default/requested minimum number of active waves per execution + /// unit. + unsigned getMinNumActiveWavesPerEU() const { + return NumActiveWavesPerEU.first; + } + + /// \returns Default/requested maximum number of active waves per execution + /// unit. + unsigned getMaxNumActiveWavesPerEU() const { + return NumActiveWavesPerEU.second; } /// \returns Stack object index for \p Dim's work group ID. @@ -412,8 +445,6 @@ } llvm_unreachable("unexpected dimension"); } - - unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; }; } // End namespace llvm Index: lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -48,8 +48,8 @@ PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), PSInputAddr(0), ReturnsVoid(true), - MaximumWorkGroupSize(0), - DebuggerReservedVGPRCount(0), + FlatWorkGroupSizes(0, 0), + NumActiveWavesPerEU(0, 0), DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), LDSWaveSpillSize(0), @@ -136,13 +136,8 @@ ST.isAmdHsaOS()) FlatScratchInit = true; - if (AMDGPU::isCompute(F->getCallingConv())) - MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F); - else - MaximumWorkGroupSize = ST.getWavefrontSize(); - - if (ST.debuggerReserveRegs()) - DebuggerReservedVGPRCount = 4; + FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); + NumActiveWavesPerEU = ST.getNumActiveWavesPerEU(*F); } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( @@ -230,8 +225,3 @@ Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; return Spill; } - -unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( - const MachineFunction &MF) const { - return MaximumWorkGroupSize; -} Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -176,14 +176,6 @@ unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const; - /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount - /// concurrent waves. - unsigned getNumVGPRsAllowed(unsigned WaveCount) const; - - /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount - /// concurrent waves. - unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const; - unsigned findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF) const; @@ -193,6 +185,74 @@ bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const; + /// \returns SGPR allocation granularity supported by the subtarget. + unsigned getSGPRAllocGranule() const { + return 8; + } + + /// \returns Total number of SGPRs supported by the subtarget. + unsigned getTotalNumSGPRs(const SISubtarget &ST) const; + + /// \returns Addressable number of SGPRs supported by the subtarget. + unsigned getAddressableNumSGPRs(const SISubtarget &ST) const; + + /// \returns Reserved number of SGPRs supported by the subtarget. + unsigned getReservedNumSGPRs(const SISubtarget &ST) const; + + /// \returns Minimum number of SGPRs that meets given number of active waves + /// per execution unit requirement for given subtarget. + unsigned getMinNumSGPRs(const SISubtarget &ST, + unsigned NumActiveWavesPerEU) const; + + /// \returns Maximum number of SGPRs that meets given number of active waves + /// per execution unit requirement for given subtarget. + unsigned getMaxNumSGPRs(const SISubtarget &ST, + unsigned NumActiveWavesPerEU) const; + + /// \returns Maximum number of SGPRs that meets number of active waves per + /// execution unit requirement for function \p MF, or number of SGPRs + /// explicitly requested using "amdgpu-num-sgpr" attribute attached to + /// function \p MF. + /// + /// \returns Value that meets number of active waves per execution unit + /// requirement if explicitly requested value cannot be converted to integer, + /// violates subtarget's specifications, or does not meet number of active + /// waves per execution unit requirement. + unsigned getMaxNumSGPRs(const MachineFunction &MF) const; + + /// \returns VGPR allocation granularity supported by the subtarget. + unsigned getVGPRAllocGranule() const { + return 4; + } + + /// \returns Total number of VGPRs supported by the subtarget. + unsigned getTotalNumVGPRs() const { + return 256; + } + + /// \returns Reserved number of VGPRs for debugger use supported by the + /// subtarget. + unsigned getDebuggerReservedNumVGPRs(const SISubtarget &ST) const; + + /// \returns Minimum number of SGPRs that meets given number of active waves + /// per execution unit requirement. + unsigned getMinNumVGPRs(unsigned NumActiveWavesPerEU) const; + + /// \returns Maximum number of VGPRs that meets given number of active waves + /// per execution unit requirement. + unsigned getMaxNumVGPRs(unsigned NumActiveWavesPerEU) const; + + /// \returns Maximum number of VGPRs that meets number of active waves per + /// execution unit requirement for function \p MF, or number of VGPRs + /// explicitly requested using "amdgpu-num-vgpr" attribute attached to + /// function \p MF. + /// + /// \returns Value that meets number of active waves per execution unit + /// requirement if explicitly requested value cannot be converted to integer, + /// violates subtarget's specifications, or does not meet number of active + /// waves per execution unit requirement. + unsigned getMaxNumVGPRs(const MachineFunction &MF) const; + private: void buildScratchLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, const MachineOperand *SrcDst, Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -24,53 +24,6 @@ using namespace llvm; -static unsigned getMaxWaveCountPerSIMD(const MachineFunction &MF) { - const SIMachineFunctionInfo &MFI = *MF.getInfo(); - const SISubtarget &ST = MF.getSubtarget(); - unsigned SIMDPerCU = 4; - - unsigned MaxInvocationsPerWave = SIMDPerCU * ST.getWavefrontSize(); - return alignTo(MFI.getMaximumWorkGroupSize(MF), MaxInvocationsPerWave) / - MaxInvocationsPerWave; -} - -static unsigned getMaxWorkGroupSGPRCount(const MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget(); - unsigned MaxWaveCountPerSIMD = getMaxWaveCountPerSIMD(MF); - - unsigned TotalSGPRCountPerSIMD, AddressableSGPRCount, SGPRUsageAlignment; - unsigned ReservedSGPRCount; - - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { - TotalSGPRCountPerSIMD = 800; - AddressableSGPRCount = 102; - SGPRUsageAlignment = 16; - ReservedSGPRCount = 6; // VCC, FLAT_SCRATCH, XNACK - } else { - TotalSGPRCountPerSIMD = 512; - AddressableSGPRCount = 104; - SGPRUsageAlignment = 8; - ReservedSGPRCount = 2; // VCC - } - - unsigned MaxSGPRCount = (TotalSGPRCountPerSIMD / MaxWaveCountPerSIMD); - MaxSGPRCount = alignDown(MaxSGPRCount, SGPRUsageAlignment); - - if (ST.hasSGPRInitBug()) - MaxSGPRCount = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; - - return std::min(MaxSGPRCount - ReservedSGPRCount, AddressableSGPRCount); -} - -static unsigned getMaxWorkGroupVGPRCount(const MachineFunction &MF) { - unsigned MaxWaveCountPerSIMD = getMaxWaveCountPerSIMD(MF); - unsigned TotalVGPRCountPerSIMD = 256; - unsigned VGPRUsageAlignment = 4; - - return alignDown(TotalVGPRCountPerSIMD / MaxWaveCountPerSIMD, - VGPRUsageAlignment); -} - static bool hasPressureSet(const int *PSets, unsigned PSetID) { for (unsigned i = 0; PSets[i] != -1; ++i) { if (PSets[i] == (int)PSetID) @@ -119,14 +72,14 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - unsigned BaseIdx = alignDown(getMaxWorkGroupSGPRCount(MF), 4) - 4; + unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { - unsigned RegCount = getMaxWorkGroupSGPRCount(MF); + unsigned RegCount = getMaxNumSGPRs(MF); unsigned Reg; // Try to place it in a hole after PrivateSegmentbufferReg. @@ -161,18 +114,16 @@ reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); - unsigned MaxWorkGroupSGPRCount = getMaxWorkGroupSGPRCount(MF); - unsigned MaxWorkGroupVGPRCount = getMaxWorkGroupVGPRCount(MF); - - unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); - unsigned NumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); - for (unsigned i = MaxWorkGroupSGPRCount; i < NumSGPRs; ++i) { + unsigned MaxNumSGPRs = getMaxNumSGPRs(MF); + unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); + for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); } - - for (unsigned i = MaxWorkGroupVGPRCount; i < NumVGPRs; ++i) { + unsigned MaxNumVGPRs = getMaxNumVGPRs(MF); + unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); + for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); reserveRegisterTuples(Reserved, Reg); } @@ -194,27 +145,13 @@ assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); } - // Reserve registers for debugger usage if "amdgpu-debugger-reserve-trap-regs" - // attribute was specified. - const SISubtarget &ST = MF.getSubtarget(); - if (ST.debuggerReserveRegs()) { - unsigned ReservedVGPRFirst = - MaxWorkGroupVGPRCount - MFI->getDebuggerReservedVGPRCount(); - for (unsigned i = ReservedVGPRFirst; i < MaxWorkGroupVGPRCount; ++i) { - unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); - reserveRegisterTuples(Reserved, Reg); - } - } - return Reserved; } unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const { - const SISubtarget &STI = MF.getSubtarget(); - // FIXME: We should adjust the max number of waves based on LDS size. - unsigned SGPRLimit = getNumSGPRsAllowed(STI, STI.getMaxWavesPerCU()); - unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU()); + unsigned SGPRLimit = getMaxNumSGPRs(MF); + unsigned VGPRLimit = getMaxNumVGPRs(MF); unsigned VSLimit = SGPRLimit + VGPRLimit; @@ -969,50 +906,197 @@ return AMDGPU::NoRegister; } -unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const { - switch(WaveCount) { - case 10: return 24; - case 9: return 28; - case 8: return 32; - case 7: return 36; - case 6: return 40; - case 5: return 48; - case 4: return 64; - case 3: return 84; - case 2: return 128; - default: return 256; +bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, + unsigned Reg) const { + const TargetRegisterClass *RC; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + RC = MRI.getRegClass(Reg); + else + RC = getPhysRegClass(Reg); + + return hasVGPRs(RC); +} + +unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 800; + return 512; +} + +unsigned SIRegisterInfo::getAddressableNumSGPRs(const SISubtarget &ST) const { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 102; + return 104; +} + +unsigned SIRegisterInfo::getReservedNumSGPRs(const SISubtarget &ST) const { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 6; // VCC, FLAT_SCRATCH, XNACK. + return 2; // VCC. +} + +unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST, + unsigned NumActiveWavesPerEU) const { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (NumActiveWavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 0; + case 8: return 81; + default: return 97; + } + } else { + switch (NumActiveWavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 49; + case 8: return 57; + case 7: return 65; + case 6: return 73; + case 5: return 81; + default: return 97; + } } } -unsigned SIRegisterInfo::getNumSGPRsAllowed(const SISubtarget &ST, - unsigned WaveCount) const { - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { - switch (WaveCount) { +unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST, + unsigned NumActiveWavesPerEU) const { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (NumActiveWavesPerEU) { + case 0: return 80; case 10: return 80; case 9: return 80; case 8: return 96; - default: return 102; + default: return getAddressableNumSGPRs(ST); } } else { - switch(WaveCount) { + switch (NumActiveWavesPerEU) { + case 0: return 48; case 10: return 48; case 9: return 56; case 8: return 64; case 7: return 72; case 6: return 80; case 5: return 96; - default: return 103; + default: return getAddressableNumSGPRs(ST); } } } -bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, - unsigned Reg) const { - const TargetRegisterClass *RC; - if (TargetRegisterInfo::isVirtualRegister(Reg)) - RC = MRI.getRegClass(Reg); - else - RC = getPhysRegClass(Reg); +unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const { + const Function &F = *MF.getFunction(); - return hasVGPRs(RC); + const SISubtarget &ST = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + + // Compute maximum number of SGPRs function can use using default/requested + // minimum number of active waves per execution unit. + std::pair NumActiveWavesPerEU = + MFI.getNumActiveWavesPerEU(); + unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, NumActiveWavesPerEU.first); + + // Check if maximum number of SGPRs was explicitly requested using + // "amdgpu-num-sgpr" attribute. + if (F.hasFnAttribute("amdgpu-num-sgpr")) { + unsigned Requested = + AMDGPU::getIntegerAttribute(F, "amdgpu-num-sgpr", MaxNumSGPRs); + + // Make sure requested value does not violate subtarget's specifications. + if (Requested && Requested <= getReservedNumSGPRs(ST)) + Requested = 0; + + // Make sure requested value is compatible with values implied by + // default/requested minimum/maximum number of active waves per execution + // unit. + if (Requested && Requested > getMaxNumSGPRs(ST, NumActiveWavesPerEU.first)) + Requested = 0; + if (NumActiveWavesPerEU.second && + Requested && Requested < getMinNumSGPRs(ST, NumActiveWavesPerEU.second)) + Requested = 0; + + if (Requested) + MaxNumSGPRs = Requested; + } + + if (ST.hasSGPRInitBug()) + MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + + return MaxNumSGPRs - getReservedNumSGPRs(ST); +} + +unsigned SIRegisterInfo::getDebuggerReservedNumVGPRs( + const SISubtarget &ST) const { + if (ST.debuggerReserveRegs()) + return 4; + return 0; +} + +unsigned SIRegisterInfo::getMinNumVGPRs(unsigned NumActiveWavesPerEU) const { + switch (NumActiveWavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 25; + case 8: return 29; + case 7: return 33; + case 6: return 37; + case 5: return 41; + case 4: return 49; + case 3: return 65; + case 2: return 85; + default: return 129; + } +} + +unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned NumActiveWavesPerEU) const { + switch (NumActiveWavesPerEU) { + case 0: return 24; + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return getTotalNumVGPRs(); + } +} + +unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const { + const Function &F = *MF.getFunction(); + + const SISubtarget &ST = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + + // Compute maximum number of VGPRs function can use using default/requested + // minimum number of active waves per execution unit. + std::pair NumActiveWavesPerEU = + MFI.getNumActiveWavesPerEU(); + unsigned MaxNumVGPRs = getMaxNumVGPRs(NumActiveWavesPerEU.first); + + // Check if maximum number of VGPRs was explicitly requested using + // "amdgpu-num-vgpr" attribute. + if (F.hasFnAttribute("amdgpu-num-vgpr")) { + unsigned Requested = + AMDGPU::getIntegerAttribute(F, "amdgpu-num-vgpr", MaxNumVGPRs); + + // Make sure requested value does not violate subtarget's specifications. + if (Requested && Requested <= getDebuggerReservedNumVGPRs(ST)) + Requested = 0; + + // Make sure requested value is compatible with values implied by + // default/requested minimum/maximum number of active waves per execution + // unit. + if (Requested && Requested > getMaxNumVGPRs(NumActiveWavesPerEU.first)) + Requested = 0; + if (NumActiveWavesPerEU.second && + Requested && Requested < getMinNumVGPRs(NumActiveWavesPerEU.second)) + Requested = 0; + + if (Requested) + MaxNumVGPRs = Requested; + } + + return MaxNumVGPRs - getDebuggerReservedNumVGPRs(ST); } Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -45,9 +45,28 @@ bool isGlobalSegment(const GlobalValue *GV); bool isReadOnlySegment(const GlobalValue *GV); +/// \returns Integer value requested using \p F's \p Name attribute. +/// +/// \returns \p Default if attribute is not present. +/// +/// \returns \p Default and emits error if requested value cannot be converted +/// to integer. int getIntegerAttribute(const Function &F, StringRef Name, int Default); -unsigned getMaximumWorkGroupSize(const Function &F); +/// \returns A pair of integer values requested using \p F's \p Name attribute +/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired +/// is false). +/// +/// \returns \p Default if attribute is not present. +/// +/// \returns \p Default and emits error if one of the requested values cannot be +/// converted to integer, or \p OnlyFirstRequired is false and "second" value is +/// not present. +std::pair getIntegerPairAttribute(const Function &F, + StringRef Name, + std::pair Default, + bool OnlyFirstRequired = false); + unsigned getInitialPSInputAddr(const Function &F); bool isShader(CallingConv::ID cc); Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -124,8 +124,29 @@ return Result; } -unsigned getMaximumWorkGroupSize(const Function &F) { - return getIntegerAttribute(F, "amdgpu-max-work-group-size", 256); +std::pair getIntegerPairAttribute(const Function &F, + StringRef Name, + std::pair Default, + bool OnlyFirstRequired) { + Attribute A = F.getFnAttribute(Name); + if (!A.isStringAttribute()) + return Default; + + LLVMContext &Ctx = F.getContext(); + std::pair Ints = Default; + std::pair Strs = A.getValueAsString().split(','); + if (Strs.first.trim().getAsInteger(0, Ints.first)) { + Ctx.emitError("can't parse first integer attribute " + Name); + return Default; + } + if (Strs.second.trim().getAsInteger(0, Ints.second)) { + if (!OnlyFirstRequired || Strs.second.trim().size()) { + Ctx.emitError("can't parse second integer attribute " + Name); + return Default; + } + } + + return Ints; } unsigned getInitialPSInputAddr(const Function &F) { Index: test/CodeGen/AMDGPU/amdgpu.private-memory.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -545,7 +545,7 @@ ret void } -attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" } +attributes #0 = { nounwind "amdgpu-num-active-waves-per-eu"="1,2" } ; HSAOPT: !0 = !{} ; HSAOPT: !1 = !{i32 0, i32 2048} Index: test/CodeGen/AMDGPU/array-ptr-calc-i32.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -47,6 +47,6 @@ ret void } -attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="1" } +attributes #0 = { nounwind "amdgpu-num-active-waves-per-eu"="1,1" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind convergent } Index: test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -0,0 +1,548 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}empty_64_64: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_64_64() #0 { +entry: + ret void +} +attributes #0 = {"amdgpu-flat-work-group-size"="64,64"} + +; CHECK-LABEL: {{^}}empty_64_128: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_64_128() #1 { +entry: + ret void +} +attributes #1 = {"amdgpu-flat-work-group-size"="64,128"} + +; CHECK-LABEL: {{^}}empty_128_128: +; CHECK: SGPRBlocks: 10 +; CHECK: VGPRBlocks: 7 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 81 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 29 +define void @empty_128_128() #2 { +entry: + ret void +} +attributes #2 = {"amdgpu-flat-work-group-size"="128,128"} + +@var = addrspace(1) global float 0.0 + +; CHECK-LABEL: {{^}}exactly_256_256: +; CHECK: SGPRBlocks: 2 +; CHECK: VGPRBlocks: 5 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 19 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 24 +define void @exactly_256_256() #3 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + %val31 = load volatile float, float addrspace(1)* @var + %val32 = load volatile float, float addrspace(1)* @var + %val33 = load volatile float, float addrspace(1)* @var + %val34 = load volatile float, float addrspace(1)* @var + %val35 = load volatile float, float addrspace(1)* @var + %val36 = load volatile float, float addrspace(1)* @var + %val37 = load volatile float, float addrspace(1)* @var + %val38 = load volatile float, float addrspace(1)* @var + %val39 = load volatile float, float addrspace(1)* @var + %val40 = load volatile float, float addrspace(1)* @var + %val41 = load volatile float, float addrspace(1)* @var + %val42 = load volatile float, float addrspace(1)* @var + %val43 = load volatile float, float addrspace(1)* @var + %val44 = load volatile float, float addrspace(1)* @var + %val45 = load volatile float, float addrspace(1)* @var + %val46 = load volatile float, float addrspace(1)* @var + %val47 = load volatile float, float addrspace(1)* @var + %val48 = load volatile float, float addrspace(1)* @var + %val49 = load volatile float, float addrspace(1)* @var + %val50 = load volatile float, float addrspace(1)* @var + %val51 = load volatile float, float addrspace(1)* @var + %val52 = load volatile float, float addrspace(1)* @var + %val53 = load volatile float, float addrspace(1)* @var + %val54 = load volatile float, float addrspace(1)* @var + %val55 = load volatile float, float addrspace(1)* @var + %val56 = load volatile float, float addrspace(1)* @var + %val57 = load volatile float, float addrspace(1)* @var + %val58 = load volatile float, float addrspace(1)* @var + %val59 = load volatile float, float addrspace(1)* @var + %val60 = load volatile float, float addrspace(1)* @var + %val61 = load volatile float, float addrspace(1)* @var + %val62 = load volatile float, float addrspace(1)* @var + %val63 = load volatile float, float addrspace(1)* @var + %val64 = load volatile float, float addrspace(1)* @var + %val65 = load volatile float, float addrspace(1)* @var + %val66 = load volatile float, float addrspace(1)* @var + %val67 = load volatile float, float addrspace(1)* @var + %val68 = load volatile float, float addrspace(1)* @var + %val69 = load volatile float, float addrspace(1)* @var + %val70 = load volatile float, float addrspace(1)* @var + %val71 = load volatile float, float addrspace(1)* @var + %val72 = load volatile float, float addrspace(1)* @var + %val73 = load volatile float, float addrspace(1)* @var + %val74 = load volatile float, float addrspace(1)* @var + %val75 = load volatile float, float addrspace(1)* @var + %val76 = load volatile float, float addrspace(1)* @var + %val77 = load volatile float, float addrspace(1)* @var + %val78 = load volatile float, float addrspace(1)* @var + %val79 = load volatile float, float addrspace(1)* @var + %val80 = load volatile float, float addrspace(1)* @var + %val81 = load volatile float, float addrspace(1)* @var + %val82 = load volatile float, float addrspace(1)* @var + %val83 = load volatile float, float addrspace(1)* @var + %val84 = load volatile float, float addrspace(1)* @var + %val85 = load volatile float, float addrspace(1)* @var + %val86 = load volatile float, float addrspace(1)* @var + %val87 = load volatile float, float addrspace(1)* @var + %val88 = load volatile float, float addrspace(1)* @var + %val89 = load volatile float, float addrspace(1)* @var + %val90 = load volatile float, float addrspace(1)* @var + %val91 = load volatile float, float addrspace(1)* @var + %val92 = load volatile float, float addrspace(1)* @var + %val93 = load volatile float, float addrspace(1)* @var + %val94 = load volatile float, float addrspace(1)* @var + %val95 = load volatile float, float addrspace(1)* @var + %val96 = load volatile float, float addrspace(1)* @var + %val97 = load volatile float, float addrspace(1)* @var + %val98 = load volatile float, float addrspace(1)* @var + %val99 = load volatile float, float addrspace(1)* @var + %val100 = load volatile float, float addrspace(1)* @var + %val101 = load volatile float, float addrspace(1)* @var + %val102 = load volatile float, float addrspace(1)* @var + %val103 = load volatile float, float addrspace(1)* @var + %val104 = load volatile float, float addrspace(1)* @var + %val105 = load volatile float, float addrspace(1)* @var + %val106 = load volatile float, float addrspace(1)* @var + %val107 = load volatile float, float addrspace(1)* @var + %val108 = load volatile float, float addrspace(1)* @var + %val109 = load volatile float, float addrspace(1)* @var + %val110 = load volatile float, float addrspace(1)* @var + %val111 = load volatile float, float addrspace(1)* @var + %val112 = load volatile float, float addrspace(1)* @var + %val113 = load volatile float, float addrspace(1)* @var + %val114 = load volatile float, float addrspace(1)* @var + %val115 = load volatile float, float addrspace(1)* @var + %val116 = load volatile float, float addrspace(1)* @var + %val117 = load volatile float, float addrspace(1)* @var + %val118 = load volatile float, float addrspace(1)* @var + %val119 = load volatile float, float addrspace(1)* @var + %val120 = load volatile float, float addrspace(1)* @var + %val121 = load volatile float, float addrspace(1)* @var + %val122 = load volatile float, float addrspace(1)* @var + %val123 = load volatile float, float addrspace(1)* @var + %val124 = load volatile float, float addrspace(1)* @var + %val125 = load volatile float, float addrspace(1)* @var + %val126 = load volatile float, float addrspace(1)* @var + %val127 = load volatile float, float addrspace(1)* @var + %val128 = load volatile float, float addrspace(1)* @var + %val129 = load volatile float, float addrspace(1)* @var + %val130 = load volatile float, float addrspace(1)* @var + %val131 = load volatile float, float addrspace(1)* @var + %val132 = load volatile float, float addrspace(1)* @var + %val133 = load volatile float, float addrspace(1)* @var + %val134 = load volatile float, float addrspace(1)* @var + %val135 = load volatile float, float addrspace(1)* @var + %val136 = load volatile float, float addrspace(1)* @var + %val137 = load volatile float, float addrspace(1)* @var + %val138 = load volatile float, float addrspace(1)* @var + %val139 = load volatile float, float addrspace(1)* @var + %val140 = load volatile float, float addrspace(1)* @var + %val141 = load volatile float, float addrspace(1)* @var + %val142 = load volatile float, float addrspace(1)* @var + %val143 = load volatile float, float addrspace(1)* @var + %val144 = load volatile float, float addrspace(1)* @var + %val145 = load volatile float, float addrspace(1)* @var + %val146 = load volatile float, float addrspace(1)* @var + %val147 = load volatile float, float addrspace(1)* @var + %val148 = load volatile float, float addrspace(1)* @var + %val149 = load volatile float, float addrspace(1)* @var + %val150 = load volatile float, float addrspace(1)* @var + %val151 = load volatile float, float addrspace(1)* @var + %val152 = load volatile float, float addrspace(1)* @var + %val153 = load volatile float, float addrspace(1)* @var + %val154 = load volatile float, float addrspace(1)* @var + %val155 = load volatile float, float addrspace(1)* @var + %val156 = load volatile float, float addrspace(1)* @var + %val157 = load volatile float, float addrspace(1)* @var + %val158 = load volatile float, float addrspace(1)* @var + %val159 = load volatile float, float addrspace(1)* @var + %val160 = load volatile float, float addrspace(1)* @var + %val161 = load volatile float, float addrspace(1)* @var + %val162 = load volatile float, float addrspace(1)* @var + %val163 = load volatile float, float addrspace(1)* @var + %val164 = load volatile float, float addrspace(1)* @var + %val165 = load volatile float, float addrspace(1)* @var + %val166 = load volatile float, float addrspace(1)* @var + %val167 = load volatile float, float addrspace(1)* @var + %val168 = load volatile float, float addrspace(1)* @var + %val169 = load volatile float, float addrspace(1)* @var + %val170 = load volatile float, float addrspace(1)* @var + %val171 = load volatile float, float addrspace(1)* @var + %val172 = load volatile float, float addrspace(1)* @var + %val173 = load volatile float, float addrspace(1)* @var + %val174 = load volatile float, float addrspace(1)* @var + %val175 = load volatile float, float addrspace(1)* @var + %val176 = load volatile float, float addrspace(1)* @var + %val177 = load volatile float, float addrspace(1)* @var + %val178 = load volatile float, float addrspace(1)* @var + %val179 = load volatile float, float addrspace(1)* @var + %val180 = load volatile float, float addrspace(1)* @var + %val181 = load volatile float, float addrspace(1)* @var + %val182 = load volatile float, float addrspace(1)* @var + %val183 = load volatile float, float addrspace(1)* @var + %val184 = load volatile float, float addrspace(1)* @var + %val185 = load volatile float, float addrspace(1)* @var + %val186 = load volatile float, float addrspace(1)* @var + %val187 = load volatile float, float addrspace(1)* @var + %val188 = load volatile float, float addrspace(1)* @var + %val189 = load volatile float, float addrspace(1)* @var + %val190 = load volatile float, float addrspace(1)* @var + %val191 = load volatile float, float addrspace(1)* @var + %val192 = load volatile float, float addrspace(1)* @var + %val193 = load volatile float, float addrspace(1)* @var + %val194 = load volatile float, float addrspace(1)* @var + %val195 = load volatile float, float addrspace(1)* @var + %val196 = load volatile float, float addrspace(1)* @var + %val197 = load volatile float, float addrspace(1)* @var + %val198 = load volatile float, float addrspace(1)* @var + %val199 = load volatile float, float addrspace(1)* @var + %val200 = load volatile float, float addrspace(1)* @var + %val201 = load volatile float, float addrspace(1)* @var + %val202 = load volatile float, float addrspace(1)* @var + %val203 = load volatile float, float addrspace(1)* @var + %val204 = load volatile float, float addrspace(1)* @var + %val205 = load volatile float, float addrspace(1)* @var + %val206 = load volatile float, float addrspace(1)* @var + %val207 = load volatile float, float addrspace(1)* @var + %val208 = load volatile float, float addrspace(1)* @var + %val209 = load volatile float, float addrspace(1)* @var + %val210 = load volatile float, float addrspace(1)* @var + %val211 = load volatile float, float addrspace(1)* @var + %val212 = load volatile float, float addrspace(1)* @var + %val213 = load volatile float, float addrspace(1)* @var + %val214 = load volatile float, float addrspace(1)* @var + %val215 = load volatile float, float addrspace(1)* @var + %val216 = load volatile float, float addrspace(1)* @var + %val217 = load volatile float, float addrspace(1)* @var + %val218 = load volatile float, float addrspace(1)* @var + %val219 = load volatile float, float addrspace(1)* @var + %val220 = load volatile float, float addrspace(1)* @var + %val221 = load volatile float, float addrspace(1)* @var + %val222 = load volatile float, float addrspace(1)* @var + %val223 = load volatile float, float addrspace(1)* @var + %val224 = load volatile float, float addrspace(1)* @var + %val225 = load volatile float, float addrspace(1)* @var + %val226 = load volatile float, float addrspace(1)* @var + %val227 = load volatile float, float addrspace(1)* @var + %val228 = load volatile float, float addrspace(1)* @var + %val229 = load volatile float, float addrspace(1)* @var + %val230 = load volatile float, float addrspace(1)* @var + %val231 = load volatile float, float addrspace(1)* @var + %val232 = load volatile float, float addrspace(1)* @var + %val233 = load volatile float, float addrspace(1)* @var + %val234 = load volatile float, float addrspace(1)* @var + %val235 = load volatile float, float addrspace(1)* @var + %val236 = load volatile float, float addrspace(1)* @var + %val237 = load volatile float, float addrspace(1)* @var + %val238 = load volatile float, float addrspace(1)* @var + %val239 = load volatile float, float addrspace(1)* @var + %val240 = load volatile float, float addrspace(1)* @var + %val241 = load volatile float, float addrspace(1)* @var + %val242 = load volatile float, float addrspace(1)* @var + %val243 = load volatile float, float addrspace(1)* @var + %val244 = load volatile float, float addrspace(1)* @var + %val245 = load volatile float, float addrspace(1)* @var + %val246 = load volatile float, float addrspace(1)* @var + %val247 = load volatile float, float addrspace(1)* @var + %val248 = load volatile float, float addrspace(1)* @var + %val249 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + store volatile float %val31, float addrspace(1)* @var + store volatile float %val32, float addrspace(1)* @var + store volatile float %val33, float addrspace(1)* @var + store volatile float %val34, float addrspace(1)* @var + store volatile float %val35, float addrspace(1)* @var + store volatile float %val36, float addrspace(1)* @var + store volatile float %val37, float addrspace(1)* @var + store volatile float %val38, float addrspace(1)* @var + store volatile float %val39, float addrspace(1)* @var + store volatile float %val40, float addrspace(1)* @var + store volatile float %val41, float addrspace(1)* @var + store volatile float %val42, float addrspace(1)* @var + store volatile float %val43, float addrspace(1)* @var + store volatile float %val44, float addrspace(1)* @var + store volatile float %val45, float addrspace(1)* @var + store volatile float %val46, float addrspace(1)* @var + store volatile float %val47, float addrspace(1)* @var + store volatile float %val48, float addrspace(1)* @var + store volatile float %val49, float addrspace(1)* @var + store volatile float %val50, float addrspace(1)* @var + store volatile float %val51, float addrspace(1)* @var + store volatile float %val52, float addrspace(1)* @var + store volatile float %val53, float addrspace(1)* @var + store volatile float %val54, float addrspace(1)* @var + store volatile float %val55, float addrspace(1)* @var + store volatile float %val56, float addrspace(1)* @var + store volatile float %val57, float addrspace(1)* @var + store volatile float %val58, float addrspace(1)* @var + store volatile float %val59, float addrspace(1)* @var + store volatile float %val60, float addrspace(1)* @var + store volatile float %val61, float addrspace(1)* @var + store volatile float %val62, float addrspace(1)* @var + store volatile float %val63, float addrspace(1)* @var + store volatile float %val64, float addrspace(1)* @var + store volatile float %val65, float addrspace(1)* @var + store volatile float %val66, float addrspace(1)* @var + store volatile float %val67, float addrspace(1)* @var + store volatile float %val68, float addrspace(1)* @var + store volatile float %val69, float addrspace(1)* @var + store volatile float %val70, float addrspace(1)* @var + store volatile float %val71, float addrspace(1)* @var + store volatile float %val72, float addrspace(1)* @var + store volatile float %val73, float addrspace(1)* @var + store volatile float %val74, float addrspace(1)* @var + store volatile float %val75, float addrspace(1)* @var + store volatile float %val76, float addrspace(1)* @var + store volatile float %val77, float addrspace(1)* @var + store volatile float %val78, float addrspace(1)* @var + store volatile float %val79, float addrspace(1)* @var + store volatile float %val80, float addrspace(1)* @var + store volatile float %val81, float addrspace(1)* @var + store volatile float %val82, float addrspace(1)* @var + store volatile float %val83, float addrspace(1)* @var + store volatile float %val84, float addrspace(1)* @var + store volatile float %val85, float addrspace(1)* @var + store volatile float %val86, float addrspace(1)* @var + store volatile float %val87, float addrspace(1)* @var + store volatile float %val88, float addrspace(1)* @var + store volatile float %val89, float addrspace(1)* @var + store volatile float %val90, float addrspace(1)* @var + store volatile float %val91, float addrspace(1)* @var + store volatile float %val92, float addrspace(1)* @var + store volatile float %val93, float addrspace(1)* @var + store volatile float %val94, float addrspace(1)* @var + store volatile float %val95, float addrspace(1)* @var + store volatile float %val96, float addrspace(1)* @var + store volatile float %val97, float addrspace(1)* @var + store volatile float %val98, float addrspace(1)* @var + store volatile float %val99, float addrspace(1)* @var + store volatile float %val100, float addrspace(1)* @var + store volatile float %val101, float addrspace(1)* @var + store volatile float %val102, float addrspace(1)* @var + store volatile float %val103, float addrspace(1)* @var + store volatile float %val104, float addrspace(1)* @var + store volatile float %val105, float addrspace(1)* @var + store volatile float %val106, float addrspace(1)* @var + store volatile float %val107, float addrspace(1)* @var + store volatile float %val108, float addrspace(1)* @var + store volatile float %val109, float addrspace(1)* @var + store volatile float %val110, float addrspace(1)* @var + store volatile float %val111, float addrspace(1)* @var + store volatile float %val112, float addrspace(1)* @var + store volatile float %val113, float addrspace(1)* @var + store volatile float %val114, float addrspace(1)* @var + store volatile float %val115, float addrspace(1)* @var + store volatile float %val116, float addrspace(1)* @var + store volatile float %val117, float addrspace(1)* @var + store volatile float %val118, float addrspace(1)* @var + store volatile float %val119, float addrspace(1)* @var + store volatile float %val120, float addrspace(1)* @var + store volatile float %val121, float addrspace(1)* @var + store volatile float %val122, float addrspace(1)* @var + store volatile float %val123, float addrspace(1)* @var + store volatile float %val124, float addrspace(1)* @var + store volatile float %val125, float addrspace(1)* @var + store volatile float %val126, float addrspace(1)* @var + store volatile float %val127, float addrspace(1)* @var + store volatile float %val128, float addrspace(1)* @var + store volatile float %val129, float addrspace(1)* @var + store volatile float %val130, float addrspace(1)* @var + store volatile float %val131, float addrspace(1)* @var + store volatile float %val132, float addrspace(1)* @var + store volatile float %val133, float addrspace(1)* @var + store volatile float %val134, float addrspace(1)* @var + store volatile float %val135, float addrspace(1)* @var + store volatile float %val136, float addrspace(1)* @var + store volatile float %val137, float addrspace(1)* @var + store volatile float %val138, float addrspace(1)* @var + store volatile float %val139, float addrspace(1)* @var + store volatile float %val140, float addrspace(1)* @var + store volatile float %val141, float addrspace(1)* @var + store volatile float %val142, float addrspace(1)* @var + store volatile float %val143, float addrspace(1)* @var + store volatile float %val144, float addrspace(1)* @var + store volatile float %val145, float addrspace(1)* @var + store volatile float %val146, float addrspace(1)* @var + store volatile float %val147, float addrspace(1)* @var + store volatile float %val148, float addrspace(1)* @var + store volatile float %val149, float addrspace(1)* @var + store volatile float %val150, float addrspace(1)* @var + store volatile float %val151, float addrspace(1)* @var + store volatile float %val152, float addrspace(1)* @var + store volatile float %val153, float addrspace(1)* @var + store volatile float %val154, float addrspace(1)* @var + store volatile float %val155, float addrspace(1)* @var + store volatile float %val156, float addrspace(1)* @var + store volatile float %val157, float addrspace(1)* @var + store volatile float %val158, float addrspace(1)* @var + store volatile float %val159, float addrspace(1)* @var + store volatile float %val160, float addrspace(1)* @var + store volatile float %val161, float addrspace(1)* @var + store volatile float %val162, float addrspace(1)* @var + store volatile float %val163, float addrspace(1)* @var + store volatile float %val164, float addrspace(1)* @var + store volatile float %val165, float addrspace(1)* @var + store volatile float %val166, float addrspace(1)* @var + store volatile float %val167, float addrspace(1)* @var + store volatile float %val168, float addrspace(1)* @var + store volatile float %val169, float addrspace(1)* @var + store volatile float %val170, float addrspace(1)* @var + store volatile float %val171, float addrspace(1)* @var + store volatile float %val172, float addrspace(1)* @var + store volatile float %val173, float addrspace(1)* @var + store volatile float %val174, float addrspace(1)* @var + store volatile float %val175, float addrspace(1)* @var + store volatile float %val176, float addrspace(1)* @var + store volatile float %val177, float addrspace(1)* @var + store volatile float %val178, float addrspace(1)* @var + store volatile float %val179, float addrspace(1)* @var + store volatile float %val180, float addrspace(1)* @var + store volatile float %val181, float addrspace(1)* @var + store volatile float %val182, float addrspace(1)* @var + store volatile float %val183, float addrspace(1)* @var + store volatile float %val184, float addrspace(1)* @var + store volatile float %val185, float addrspace(1)* @var + store volatile float %val186, float addrspace(1)* @var + store volatile float %val187, float addrspace(1)* @var + store volatile float %val188, float addrspace(1)* @var + store volatile float %val189, float addrspace(1)* @var + store volatile float %val190, float addrspace(1)* @var + store volatile float %val191, float addrspace(1)* @var + store volatile float %val192, float addrspace(1)* @var + store volatile float %val193, float addrspace(1)* @var + store volatile float %val194, float addrspace(1)* @var + store volatile float %val195, float addrspace(1)* @var + store volatile float %val196, float addrspace(1)* @var + store volatile float %val197, float addrspace(1)* @var + store volatile float %val198, float addrspace(1)* @var + store volatile float %val199, float addrspace(1)* @var + store volatile float %val200, float addrspace(1)* @var + store volatile float %val201, float addrspace(1)* @var + store volatile float %val202, float addrspace(1)* @var + store volatile float %val203, float addrspace(1)* @var + store volatile float %val204, float addrspace(1)* @var + store volatile float %val205, float addrspace(1)* @var + store volatile float %val206, float addrspace(1)* @var + store volatile float %val207, float addrspace(1)* @var + store volatile float %val208, float addrspace(1)* @var + store volatile float %val209, float addrspace(1)* @var + store volatile float %val210, float addrspace(1)* @var + store volatile float %val211, float addrspace(1)* @var + store volatile float %val212, float addrspace(1)* @var + store volatile float %val213, float addrspace(1)* @var + store volatile float %val214, float addrspace(1)* @var + store volatile float %val215, float addrspace(1)* @var + store volatile float %val216, float addrspace(1)* @var + store volatile float %val217, float addrspace(1)* @var + store volatile float %val218, float addrspace(1)* @var + store volatile float %val219, float addrspace(1)* @var + store volatile float %val220, float addrspace(1)* @var + store volatile float %val221, float addrspace(1)* @var + store volatile float %val222, float addrspace(1)* @var + store volatile float %val223, float addrspace(1)* @var + store volatile float %val224, float addrspace(1)* @var + store volatile float %val225, float addrspace(1)* @var + store volatile float %val226, float addrspace(1)* @var + store volatile float %val227, float addrspace(1)* @var + store volatile float %val228, float addrspace(1)* @var + store volatile float %val229, float addrspace(1)* @var + store volatile float %val230, float addrspace(1)* @var + store volatile float %val231, float addrspace(1)* @var + store volatile float %val232, float addrspace(1)* @var + store volatile float %val233, float addrspace(1)* @var + store volatile float %val234, float addrspace(1)* @var + store volatile float %val235, float addrspace(1)* @var + store volatile float %val236, float addrspace(1)* @var + store volatile float %val237, float addrspace(1)* @var + store volatile float %val238, float addrspace(1)* @var + store volatile float %val239, float addrspace(1)* @var + store volatile float %val240, float addrspace(1)* @var + store volatile float %val241, float addrspace(1)* @var + store volatile float %val242, float addrspace(1)* @var + store volatile float %val243, float addrspace(1)* @var + store volatile float %val244, float addrspace(1)* @var + store volatile float %val245, float addrspace(1)* @var + store volatile float %val246, float addrspace(1)* @var + store volatile float %val247, float addrspace(1)* @var + store volatile float %val248, float addrspace(1)* @var + store volatile float %val249, float addrspace(1)* @var + + ret void +} +attributes #3 = { "amdgpu-flat-work-group-size"="256,256" } Index: test/CodeGen/AMDGPU/attr-amdgpu-num-active-waves-per-eu.ll =================================================================== --- test/CodeGen/AMDGPU/attr-amdgpu-num-active-waves-per-eu.ll +++ test/CodeGen/AMDGPU/attr-amdgpu-num-active-waves-per-eu.ll @@ -0,0 +1,628 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +; Exactly 1 active wave per execution unit. +; CHECK-LABEL: {{^}}empty_exactly_1: +; CHECK: SGPRBlocks: 12 +; CHECK: VGPRBlocks: 32 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 97 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 129 +define void @empty_exactly_1() #0 { +entry: + ret void +} +attributes #0 = {"amdgpu-num-active-waves-per-eu"="1,1"} + +; Exactly 5 active waves per execution unit. +; CHECK-LABEL: {{^}}empty_exactly_5: +; CHECK: SGPRBlocks: 12 +; CHECK: VGPRBlocks: 10 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 97 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 41 +define void @empty_exactly_5() #1 { +entry: + ret void +} +attributes #1 = {"amdgpu-num-active-waves-per-eu"="5,5"} + +; Exactly 10 active waves per execution unit. +; CHECK-LABEL: {{^}}empty_exactly_10: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_exactly_10() #2 { +entry: + ret void +} +attributes #2 = {"amdgpu-num-active-waves-per-eu"="10,10" "amdgpu-flat-work-group-size"="256,256"} + +; At least 1 active wave per execution unit. +; CHECK-LABEL: {{^}}empty_at_least_1: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_at_least_1() #3 { +entry: + ret void +} +attributes #3 = {"amdgpu-num-active-waves-per-eu"="1"} + +; At least 5 active waves per execution unit. +; CHECK-LABEL: {{^}}empty_at_least_5: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_at_least_5() #4 { +entry: + ret void +} +attributes #4 = {"amdgpu-num-active-waves-per-eu"="5"} + +; At least 10 active waves per execution unit. +; CHECK-LABEL: {{^}}empty_at_least_10: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_at_least_10() #5 { +entry: + ret void +} +attributes #5 = {"amdgpu-num-active-waves-per-eu"="10" "amdgpu-flat-work-group-size"="256,256"} + +; At most 1 active wave per execution unit (same as @empty_exactly_1). + +; At most 5 active waves per execution unit. +; CHECK-LABEL: {{^}}empty_at_most_5: +; CHECK: SGPRBlocks: 12 +; CHECK: VGPRBlocks: 10 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 97 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 41 +define void @empty_at_most_5() #6 { +entry: + ret void +} +attributes #6 = {"amdgpu-num-active-waves-per-eu"="1,5"} + +; At most 10 active waves per execution unit. +; CHECK-LABEL: {{^}}empty_at_most_10: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_at_most_10() #7 { +entry: + ret void +} +attributes #7 = {"amdgpu-num-active-waves-per-eu"="1,10"} + +; Between 1 and 5 active waves per execution unit (same as @empty_at_most_5). + +; Between 5 and 10 active waves per execution unit. +; CHECK-LABEL: {{^}}empty_between_5_and_10: +; CHECK: SGPRBlocks: 0 +; CHECK: VGPRBlocks: 0 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 1 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 1 +define void @empty_between_5_and_10() #8 { +entry: + ret void +} +attributes #8 = {"amdgpu-num-active-waves-per-eu"="5,10"} + +@var = addrspace(1) global float 0.0 + +; Exactly 10 active waves per execution unit. +; CHECK-LABEL: {{^}}exactly_10: +; CHECK: SGPRBlocks: 2 +; CHECK: VGPRBlocks: 5 +; CHECK: NumSGPRsForNumActiveWavesPerEU: 19 +; CHECK: NumVGPRsForNumActiveWavesPerEU: 24 +define void @exactly_10() #9 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + %val31 = load volatile float, float addrspace(1)* @var + %val32 = load volatile float, float addrspace(1)* @var + %val33 = load volatile float, float addrspace(1)* @var + %val34 = load volatile float, float addrspace(1)* @var + %val35 = load volatile float, float addrspace(1)* @var + %val36 = load volatile float, float addrspace(1)* @var + %val37 = load volatile float, float addrspace(1)* @var + %val38 = load volatile float, float addrspace(1)* @var + %val39 = load volatile float, float addrspace(1)* @var + %val40 = load volatile float, float addrspace(1)* @var + %val41 = load volatile float, float addrspace(1)* @var + %val42 = load volatile float, float addrspace(1)* @var + %val43 = load volatile float, float addrspace(1)* @var + %val44 = load volatile float, float addrspace(1)* @var + %val45 = load volatile float, float addrspace(1)* @var + %val46 = load volatile float, float addrspace(1)* @var + %val47 = load volatile float, float addrspace(1)* @var + %val48 = load volatile float, float addrspace(1)* @var + %val49 = load volatile float, float addrspace(1)* @var + %val50 = load volatile float, float addrspace(1)* @var + %val51 = load volatile float, float addrspace(1)* @var + %val52 = load volatile float, float addrspace(1)* @var + %val53 = load volatile float, float addrspace(1)* @var + %val54 = load volatile float, float addrspace(1)* @var + %val55 = load volatile float, float addrspace(1)* @var + %val56 = load volatile float, float addrspace(1)* @var + %val57 = load volatile float, float addrspace(1)* @var + %val58 = load volatile float, float addrspace(1)* @var + %val59 = load volatile float, float addrspace(1)* @var + %val60 = load volatile float, float addrspace(1)* @var + %val61 = load volatile float, float addrspace(1)* @var + %val62 = load volatile float, float addrspace(1)* @var + %val63 = load volatile float, float addrspace(1)* @var + %val64 = load volatile float, float addrspace(1)* @var + %val65 = load volatile float, float addrspace(1)* @var + %val66 = load volatile float, float addrspace(1)* @var + %val67 = load volatile float, float addrspace(1)* @var + %val68 = load volatile float, float addrspace(1)* @var + %val69 = load volatile float, float addrspace(1)* @var + %val70 = load volatile float, float addrspace(1)* @var + %val71 = load volatile float, float addrspace(1)* @var + %val72 = load volatile float, float addrspace(1)* @var + %val73 = load volatile float, float addrspace(1)* @var + %val74 = load volatile float, float addrspace(1)* @var + %val75 = load volatile float, float addrspace(1)* @var + %val76 = load volatile float, float addrspace(1)* @var + %val77 = load volatile float, float addrspace(1)* @var + %val78 = load volatile float, float addrspace(1)* @var + %val79 = load volatile float, float addrspace(1)* @var + %val80 = load volatile float, float addrspace(1)* @var + %val81 = load volatile float, float addrspace(1)* @var + %val82 = load volatile float, float addrspace(1)* @var + %val83 = load volatile float, float addrspace(1)* @var + %val84 = load volatile float, float addrspace(1)* @var + %val85 = load volatile float, float addrspace(1)* @var + %val86 = load volatile float, float addrspace(1)* @var + %val87 = load volatile float, float addrspace(1)* @var + %val88 = load volatile float, float addrspace(1)* @var + %val89 = load volatile float, float addrspace(1)* @var + %val90 = load volatile float, float addrspace(1)* @var + %val91 = load volatile float, float addrspace(1)* @var + %val92 = load volatile float, float addrspace(1)* @var + %val93 = load volatile float, float addrspace(1)* @var + %val94 = load volatile float, float addrspace(1)* @var + %val95 = load volatile float, float addrspace(1)* @var + %val96 = load volatile float, float addrspace(1)* @var + %val97 = load volatile float, float addrspace(1)* @var + %val98 = load volatile float, float addrspace(1)* @var + %val99 = load volatile float, float addrspace(1)* @var + %val100 = load volatile float, float addrspace(1)* @var + %val101 = load volatile float, float addrspace(1)* @var + %val102 = load volatile float, float addrspace(1)* @var + %val103 = load volatile float, float addrspace(1)* @var + %val104 = load volatile float, float addrspace(1)* @var + %val105 = load volatile float, float addrspace(1)* @var + %val106 = load volatile float, float addrspace(1)* @var + %val107 = load volatile float, float addrspace(1)* @var + %val108 = load volatile float, float addrspace(1)* @var + %val109 = load volatile float, float addrspace(1)* @var + %val110 = load volatile float, float addrspace(1)* @var + %val111 = load volatile float, float addrspace(1)* @var + %val112 = load volatile float, float addrspace(1)* @var + %val113 = load volatile float, float addrspace(1)* @var + %val114 = load volatile float, float addrspace(1)* @var + %val115 = load volatile float, float addrspace(1)* @var + %val116 = load volatile float, float addrspace(1)* @var + %val117 = load volatile float, float addrspace(1)* @var + %val118 = load volatile float, float addrspace(1)* @var + %val119 = load volatile float, float addrspace(1)* @var + %val120 = load volatile float, float addrspace(1)* @var + %val121 = load volatile float, float addrspace(1)* @var + %val122 = load volatile float, float addrspace(1)* @var + %val123 = load volatile float, float addrspace(1)* @var + %val124 = load volatile float, float addrspace(1)* @var + %val125 = load volatile float, float addrspace(1)* @var + %val126 = load volatile float, float addrspace(1)* @var + %val127 = load volatile float, float addrspace(1)* @var + %val128 = load volatile float, float addrspace(1)* @var + %val129 = load volatile float, float addrspace(1)* @var + %val130 = load volatile float, float addrspace(1)* @var + %val131 = load volatile float, float addrspace(1)* @var + %val132 = load volatile float, float addrspace(1)* @var + %val133 = load volatile float, float addrspace(1)* @var + %val134 = load volatile float, float addrspace(1)* @var + %val135 = load volatile float, float addrspace(1)* @var + %val136 = load volatile float, float addrspace(1)* @var + %val137 = load volatile float, float addrspace(1)* @var + %val138 = load volatile float, float addrspace(1)* @var + %val139 = load volatile float, float addrspace(1)* @var + %val140 = load volatile float, float addrspace(1)* @var + %val141 = load volatile float, float addrspace(1)* @var + %val142 = load volatile float, float addrspace(1)* @var + %val143 = load volatile float, float addrspace(1)* @var + %val144 = load volatile float, float addrspace(1)* @var + %val145 = load volatile float, float addrspace(1)* @var + %val146 = load volatile float, float addrspace(1)* @var + %val147 = load volatile float, float addrspace(1)* @var + %val148 = load volatile float, float addrspace(1)* @var + %val149 = load volatile float, float addrspace(1)* @var + %val150 = load volatile float, float addrspace(1)* @var + %val151 = load volatile float, float addrspace(1)* @var + %val152 = load volatile float, float addrspace(1)* @var + %val153 = load volatile float, float addrspace(1)* @var + %val154 = load volatile float, float addrspace(1)* @var + %val155 = load volatile float, float addrspace(1)* @var + %val156 = load volatile float, float addrspace(1)* @var + %val157 = load volatile float, float addrspace(1)* @var + %val158 = load volatile float, float addrspace(1)* @var + %val159 = load volatile float, float addrspace(1)* @var + %val160 = load volatile float, float addrspace(1)* @var + %val161 = load volatile float, float addrspace(1)* @var + %val162 = load volatile float, float addrspace(1)* @var + %val163 = load volatile float, float addrspace(1)* @var + %val164 = load volatile float, float addrspace(1)* @var + %val165 = load volatile float, float addrspace(1)* @var + %val166 = load volatile float, float addrspace(1)* @var + %val167 = load volatile float, float addrspace(1)* @var + %val168 = load volatile float, float addrspace(1)* @var + %val169 = load volatile float, float addrspace(1)* @var + %val170 = load volatile float, float addrspace(1)* @var + %val171 = load volatile float, float addrspace(1)* @var + %val172 = load volatile float, float addrspace(1)* @var + %val173 = load volatile float, float addrspace(1)* @var + %val174 = load volatile float, float addrspace(1)* @var + %val175 = load volatile float, float addrspace(1)* @var + %val176 = load volatile float, float addrspace(1)* @var + %val177 = load volatile float, float addrspace(1)* @var + %val178 = load volatile float, float addrspace(1)* @var + %val179 = load volatile float, float addrspace(1)* @var + %val180 = load volatile float, float addrspace(1)* @var + %val181 = load volatile float, float addrspace(1)* @var + %val182 = load volatile float, float addrspace(1)* @var + %val183 = load volatile float, float addrspace(1)* @var + %val184 = load volatile float, float addrspace(1)* @var + %val185 = load volatile float, float addrspace(1)* @var + %val186 = load volatile float, float addrspace(1)* @var + %val187 = load volatile float, float addrspace(1)* @var + %val188 = load volatile float, float addrspace(1)* @var + %val189 = load volatile float, float addrspace(1)* @var + %val190 = load volatile float, float addrspace(1)* @var + %val191 = load volatile float, float addrspace(1)* @var + %val192 = load volatile float, float addrspace(1)* @var + %val193 = load volatile float, float addrspace(1)* @var + %val194 = load volatile float, float addrspace(1)* @var + %val195 = load volatile float, float addrspace(1)* @var + %val196 = load volatile float, float addrspace(1)* @var + %val197 = load volatile float, float addrspace(1)* @var + %val198 = load volatile float, float addrspace(1)* @var + %val199 = load volatile float, float addrspace(1)* @var + %val200 = load volatile float, float addrspace(1)* @var + %val201 = load volatile float, float addrspace(1)* @var + %val202 = load volatile float, float addrspace(1)* @var + %val203 = load volatile float, float addrspace(1)* @var + %val204 = load volatile float, float addrspace(1)* @var + %val205 = load volatile float, float addrspace(1)* @var + %val206 = load volatile float, float addrspace(1)* @var + %val207 = load volatile float, float addrspace(1)* @var + %val208 = load volatile float, float addrspace(1)* @var + %val209 = load volatile float, float addrspace(1)* @var + %val210 = load volatile float, float addrspace(1)* @var + %val211 = load volatile float, float addrspace(1)* @var + %val212 = load volatile float, float addrspace(1)* @var + %val213 = load volatile float, float addrspace(1)* @var + %val214 = load volatile float, float addrspace(1)* @var + %val215 = load volatile float, float addrspace(1)* @var + %val216 = load volatile float, float addrspace(1)* @var + %val217 = load volatile float, float addrspace(1)* @var + %val218 = load volatile float, float addrspace(1)* @var + %val219 = load volatile float, float addrspace(1)* @var + %val220 = load volatile float, float addrspace(1)* @var + %val221 = load volatile float, float addrspace(1)* @var + %val222 = load volatile float, float addrspace(1)* @var + %val223 = load volatile float, float addrspace(1)* @var + %val224 = load volatile float, float addrspace(1)* @var + %val225 = load volatile float, float addrspace(1)* @var + %val226 = load volatile float, float addrspace(1)* @var + %val227 = load volatile float, float addrspace(1)* @var + %val228 = load volatile float, float addrspace(1)* @var + %val229 = load volatile float, float addrspace(1)* @var + %val230 = load volatile float, float addrspace(1)* @var + %val231 = load volatile float, float addrspace(1)* @var + %val232 = load volatile float, float addrspace(1)* @var + %val233 = load volatile float, float addrspace(1)* @var + %val234 = load volatile float, float addrspace(1)* @var + %val235 = load volatile float, float addrspace(1)* @var + %val236 = load volatile float, float addrspace(1)* @var + %val237 = load volatile float, float addrspace(1)* @var + %val238 = load volatile float, float addrspace(1)* @var + %val239 = load volatile float, float addrspace(1)* @var + %val240 = load volatile float, float addrspace(1)* @var + %val241 = load volatile float, float addrspace(1)* @var + %val242 = load volatile float, float addrspace(1)* @var + %val243 = load volatile float, float addrspace(1)* @var + %val244 = load volatile float, float addrspace(1)* @var + %val245 = load volatile float, float addrspace(1)* @var + %val246 = load volatile float, float addrspace(1)* @var + %val247 = load volatile float, float addrspace(1)* @var + %val248 = load volatile float, float addrspace(1)* @var + %val249 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + store volatile float %val31, float addrspace(1)* @var + store volatile float %val32, float addrspace(1)* @var + store volatile float %val33, float addrspace(1)* @var + store volatile float %val34, float addrspace(1)* @var + store volatile float %val35, float addrspace(1)* @var + store volatile float %val36, float addrspace(1)* @var + store volatile float %val37, float addrspace(1)* @var + store volatile float %val38, float addrspace(1)* @var + store volatile float %val39, float addrspace(1)* @var + store volatile float %val40, float addrspace(1)* @var + store volatile float %val41, float addrspace(1)* @var + store volatile float %val42, float addrspace(1)* @var + store volatile float %val43, float addrspace(1)* @var + store volatile float %val44, float addrspace(1)* @var + store volatile float %val45, float addrspace(1)* @var + store volatile float %val46, float addrspace(1)* @var + store volatile float %val47, float addrspace(1)* @var + store volatile float %val48, float addrspace(1)* @var + store volatile float %val49, float addrspace(1)* @var + store volatile float %val50, float addrspace(1)* @var + store volatile float %val51, float addrspace(1)* @var + store volatile float %val52, float addrspace(1)* @var + store volatile float %val53, float addrspace(1)* @var + store volatile float %val54, float addrspace(1)* @var + store volatile float %val55, float addrspace(1)* @var + store volatile float %val56, float addrspace(1)* @var + store volatile float %val57, float addrspace(1)* @var + store volatile float %val58, float addrspace(1)* @var + store volatile float %val59, float addrspace(1)* @var + store volatile float %val60, float addrspace(1)* @var + store volatile float %val61, float addrspace(1)* @var + store volatile float %val62, float addrspace(1)* @var + store volatile float %val63, float addrspace(1)* @var + store volatile float %val64, float addrspace(1)* @var + store volatile float %val65, float addrspace(1)* @var + store volatile float %val66, float addrspace(1)* @var + store volatile float %val67, float addrspace(1)* @var + store volatile float %val68, float addrspace(1)* @var + store volatile float %val69, float addrspace(1)* @var + store volatile float %val70, float addrspace(1)* @var + store volatile float %val71, float addrspace(1)* @var + store volatile float %val72, float addrspace(1)* @var + store volatile float %val73, float addrspace(1)* @var + store volatile float %val74, float addrspace(1)* @var + store volatile float %val75, float addrspace(1)* @var + store volatile float %val76, float addrspace(1)* @var + store volatile float %val77, float addrspace(1)* @var + store volatile float %val78, float addrspace(1)* @var + store volatile float %val79, float addrspace(1)* @var + store volatile float %val80, float addrspace(1)* @var + store volatile float %val81, float addrspace(1)* @var + store volatile float %val82, float addrspace(1)* @var + store volatile float %val83, float addrspace(1)* @var + store volatile float %val84, float addrspace(1)* @var + store volatile float %val85, float addrspace(1)* @var + store volatile float %val86, float addrspace(1)* @var + store volatile float %val87, float addrspace(1)* @var + store volatile float %val88, float addrspace(1)* @var + store volatile float %val89, float addrspace(1)* @var + store volatile float %val90, float addrspace(1)* @var + store volatile float %val91, float addrspace(1)* @var + store volatile float %val92, float addrspace(1)* @var + store volatile float %val93, float addrspace(1)* @var + store volatile float %val94, float addrspace(1)* @var + store volatile float %val95, float addrspace(1)* @var + store volatile float %val96, float addrspace(1)* @var + store volatile float %val97, float addrspace(1)* @var + store volatile float %val98, float addrspace(1)* @var + store volatile float %val99, float addrspace(1)* @var + store volatile float %val100, float addrspace(1)* @var + store volatile float %val101, float addrspace(1)* @var + store volatile float %val102, float addrspace(1)* @var + store volatile float %val103, float addrspace(1)* @var + store volatile float %val104, float addrspace(1)* @var + store volatile float %val105, float addrspace(1)* @var + store volatile float %val106, float addrspace(1)* @var + store volatile float %val107, float addrspace(1)* @var + store volatile float %val108, float addrspace(1)* @var + store volatile float %val109, float addrspace(1)* @var + store volatile float %val110, float addrspace(1)* @var + store volatile float %val111, float addrspace(1)* @var + store volatile float %val112, float addrspace(1)* @var + store volatile float %val113, float addrspace(1)* @var + store volatile float %val114, float addrspace(1)* @var + store volatile float %val115, float addrspace(1)* @var + store volatile float %val116, float addrspace(1)* @var + store volatile float %val117, float addrspace(1)* @var + store volatile float %val118, float addrspace(1)* @var + store volatile float %val119, float addrspace(1)* @var + store volatile float %val120, float addrspace(1)* @var + store volatile float %val121, float addrspace(1)* @var + store volatile float %val122, float addrspace(1)* @var + store volatile float %val123, float addrspace(1)* @var + store volatile float %val124, float addrspace(1)* @var + store volatile float %val125, float addrspace(1)* @var + store volatile float %val126, float addrspace(1)* @var + store volatile float %val127, float addrspace(1)* @var + store volatile float %val128, float addrspace(1)* @var + store volatile float %val129, float addrspace(1)* @var + store volatile float %val130, float addrspace(1)* @var + store volatile float %val131, float addrspace(1)* @var + store volatile float %val132, float addrspace(1)* @var + store volatile float %val133, float addrspace(1)* @var + store volatile float %val134, float addrspace(1)* @var + store volatile float %val135, float addrspace(1)* @var + store volatile float %val136, float addrspace(1)* @var + store volatile float %val137, float addrspace(1)* @var + store volatile float %val138, float addrspace(1)* @var + store volatile float %val139, float addrspace(1)* @var + store volatile float %val140, float addrspace(1)* @var + store volatile float %val141, float addrspace(1)* @var + store volatile float %val142, float addrspace(1)* @var + store volatile float %val143, float addrspace(1)* @var + store volatile float %val144, float addrspace(1)* @var + store volatile float %val145, float addrspace(1)* @var + store volatile float %val146, float addrspace(1)* @var + store volatile float %val147, float addrspace(1)* @var + store volatile float %val148, float addrspace(1)* @var + store volatile float %val149, float addrspace(1)* @var + store volatile float %val150, float addrspace(1)* @var + store volatile float %val151, float addrspace(1)* @var + store volatile float %val152, float addrspace(1)* @var + store volatile float %val153, float addrspace(1)* @var + store volatile float %val154, float addrspace(1)* @var + store volatile float %val155, float addrspace(1)* @var + store volatile float %val156, float addrspace(1)* @var + store volatile float %val157, float addrspace(1)* @var + store volatile float %val158, float addrspace(1)* @var + store volatile float %val159, float addrspace(1)* @var + store volatile float %val160, float addrspace(1)* @var + store volatile float %val161, float addrspace(1)* @var + store volatile float %val162, float addrspace(1)* @var + store volatile float %val163, float addrspace(1)* @var + store volatile float %val164, float addrspace(1)* @var + store volatile float %val165, float addrspace(1)* @var + store volatile float %val166, float addrspace(1)* @var + store volatile float %val167, float addrspace(1)* @var + store volatile float %val168, float addrspace(1)* @var + store volatile float %val169, float addrspace(1)* @var + store volatile float %val170, float addrspace(1)* @var + store volatile float %val171, float addrspace(1)* @var + store volatile float %val172, float addrspace(1)* @var + store volatile float %val173, float addrspace(1)* @var + store volatile float %val174, float addrspace(1)* @var + store volatile float %val175, float addrspace(1)* @var + store volatile float %val176, float addrspace(1)* @var + store volatile float %val177, float addrspace(1)* @var + store volatile float %val178, float addrspace(1)* @var + store volatile float %val179, float addrspace(1)* @var + store volatile float %val180, float addrspace(1)* @var + store volatile float %val181, float addrspace(1)* @var + store volatile float %val182, float addrspace(1)* @var + store volatile float %val183, float addrspace(1)* @var + store volatile float %val184, float addrspace(1)* @var + store volatile float %val185, float addrspace(1)* @var + store volatile float %val186, float addrspace(1)* @var + store volatile float %val187, float addrspace(1)* @var + store volatile float %val188, float addrspace(1)* @var + store volatile float %val189, float addrspace(1)* @var + store volatile float %val190, float addrspace(1)* @var + store volatile float %val191, float addrspace(1)* @var + store volatile float %val192, float addrspace(1)* @var + store volatile float %val193, float addrspace(1)* @var + store volatile float %val194, float addrspace(1)* @var + store volatile float %val195, float addrspace(1)* @var + store volatile float %val196, float addrspace(1)* @var + store volatile float %val197, float addrspace(1)* @var + store volatile float %val198, float addrspace(1)* @var + store volatile float %val199, float addrspace(1)* @var + store volatile float %val200, float addrspace(1)* @var + store volatile float %val201, float addrspace(1)* @var + store volatile float %val202, float addrspace(1)* @var + store volatile float %val203, float addrspace(1)* @var + store volatile float %val204, float addrspace(1)* @var + store volatile float %val205, float addrspace(1)* @var + store volatile float %val206, float addrspace(1)* @var + store volatile float %val207, float addrspace(1)* @var + store volatile float %val208, float addrspace(1)* @var + store volatile float %val209, float addrspace(1)* @var + store volatile float %val210, float addrspace(1)* @var + store volatile float %val211, float addrspace(1)* @var + store volatile float %val212, float addrspace(1)* @var + store volatile float %val213, float addrspace(1)* @var + store volatile float %val214, float addrspace(1)* @var + store volatile float %val215, float addrspace(1)* @var + store volatile float %val216, float addrspace(1)* @var + store volatile float %val217, float addrspace(1)* @var + store volatile float %val218, float addrspace(1)* @var + store volatile float %val219, float addrspace(1)* @var + store volatile float %val220, float addrspace(1)* @var + store volatile float %val221, float addrspace(1)* @var + store volatile float %val222, float addrspace(1)* @var + store volatile float %val223, float addrspace(1)* @var + store volatile float %val224, float addrspace(1)* @var + store volatile float %val225, float addrspace(1)* @var + store volatile float %val226, float addrspace(1)* @var + store volatile float %val227, float addrspace(1)* @var + store volatile float %val228, float addrspace(1)* @var + store volatile float %val229, float addrspace(1)* @var + store volatile float %val230, float addrspace(1)* @var + store volatile float %val231, float addrspace(1)* @var + store volatile float %val232, float addrspace(1)* @var + store volatile float %val233, float addrspace(1)* @var + store volatile float %val234, float addrspace(1)* @var + store volatile float %val235, float addrspace(1)* @var + store volatile float %val236, float addrspace(1)* @var + store volatile float %val237, float addrspace(1)* @var + store volatile float %val238, float addrspace(1)* @var + store volatile float %val239, float addrspace(1)* @var + store volatile float %val240, float addrspace(1)* @var + store volatile float %val241, float addrspace(1)* @var + store volatile float %val242, float addrspace(1)* @var + store volatile float %val243, float addrspace(1)* @var + store volatile float %val244, float addrspace(1)* @var + store volatile float %val245, float addrspace(1)* @var + store volatile float %val246, float addrspace(1)* @var + store volatile float %val247, float addrspace(1)* @var + store volatile float %val248, float addrspace(1)* @var + store volatile float %val249, float addrspace(1)* @var + + ret void +} +attributes #9 = { "amdgpu-num-active-waves-per-eu"="10,10" "amdgpu-flat-work-group-size"="256,256" } Index: test/CodeGen/AMDGPU/attr-unparseable.ll =================================================================== --- test/CodeGen/AMDGPU/attr-unparseable.ll +++ test/CodeGen/AMDGPU/attr-unparseable.ll @@ -0,0 +1,57 @@ +; RUN: not llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s + +; CHECK: can't parse integer attribute amdgpu-num-sgpr +define void @unparseable_single_0() #0 { +entry: + ret void +} +attributes #0 = { "amdgpu-num-sgpr" } + +; CHECK: can't parse integer attribute amdgpu-num-sgpr +define void @unparseable_single_1() #1 { +entry: + ret void +} +attributes #1 = { "amdgpu-num-sgpr"="k" } + +; CHECK: can't parse integer attribute amdgpu-num-sgpr +define void @unparseable_single_2() #2 { +entry: + ret void +} +attributes #2 = { "amdgpu-num-sgpr"="1,2" } + +; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size +define void @unparseable_pair_0() #3 { +entry: + ret void +} +attributes #3 = { "amdgpu-flat-work-group-size" } + +; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size +define void @unparseable_pair_1() #4 { +entry: + ret void +} +attributes #4 = { "amdgpu-flat-work-group-size"="k" } + +; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size +define void @unparseable_pair_2() #5 { +entry: + ret void +} +attributes #5 = { "amdgpu-flat-work-group-size"="1" } + +; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size +define void @unparseable_pair_3() #6 { +entry: + ret void +} +attributes #6 = { "amdgpu-flat-work-group-size"="1,k" } + +; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size +define void @unparseable_pair_4() #7 { +entry: + ret void +} +attributes #7 = { "amdgpu-flat-work-group-size"="1,2,3" } Index: test/CodeGen/AMDGPU/indirect-private-64.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-private-64.ll +++ test/CodeGen/AMDGPU/indirect-private-64.ll @@ -121,4 +121,4 @@ } attributes #0 = { convergent nounwind } -attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="2" "amdgpu-max-work-group-size"="64" } +attributes #1 = { nounwind "amdgpu-num-active-waves-per-eu"="2,2" "amdgpu-flat-work-group-size"="64,64" } Index: test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll =================================================================== --- test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll +++ test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll @@ -254,11 +254,11 @@ ret void } -attributes #0 = { nounwind "amdgpu-max-work-group-size"="63" } -attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="3" "amdgpu-max-work-group-size"="256" } -attributes #2 = { nounwind "amdgpu-max-waves-per-eu"="1" "amdgpu-max-work-group-size"="1600" } -attributes #3 = { nounwind "amdgpu-max-waves-per-eu"="0" } -attributes #4 = { nounwind "amdgpu-max-waves-per-eu"="-1" } -attributes #5 = { nounwind "amdgpu-max-waves-per-eu"="6" "amdgpu-max-work-group-size"="64" } -attributes #6 = { nounwind "amdgpu-max-waves-per-eu"="8" "amdgpu-max-work-group-size"="64" } -attributes #7 = { nounwind "amdgpu-max-waves-per-eu"="9" "amdgpu-max-work-group-size"="64" } +attributes #0 = { nounwind "amdgpu-flat-work-group-size"="63,63" } +attributes #1 = { nounwind "amdgpu-num-active-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" } +attributes #2 = { nounwind "amdgpu-num-active-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1600,1600" } +attributes #3 = { nounwind "amdgpu-num-active-waves-per-eu"="1,10" } +attributes #4 = { nounwind "amdgpu-num-active-waves-per-eu"="1,10" } +attributes #5 = { nounwind "amdgpu-num-active-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" } +attributes #6 = { nounwind "amdgpu-num-active-waves-per-eu"="1,8" "amdgpu-flat-work-group-size"="64,64" } +attributes #7 = { nounwind "amdgpu-num-active-waves-per-eu"="1,9" "amdgpu-flat-work-group-size"="64,64" } Index: test/CodeGen/AMDGPU/large-work-group-registers.ll =================================================================== --- test/CodeGen/AMDGPU/large-work-group-registers.ll +++ test/CodeGen/AMDGPU/large-work-group-registers.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -post-RA-scheduler=0 < %s | FileCheck %s -; CHECK: NumVgprs: 64 +; CHECK: NumVgprs: 32 define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, <3 x i32> inreg, <3 x i32> inreg, <3 x i32>) #0 { main_body: %8 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %4, i64 0, i64 8 @@ -33,7 +33,7 @@ declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2 -attributes #0 = { "amdgpu-max-work-group-size"="1024" } +attributes #0 = { "amdgpu-flat-work-group-size"="1024,1024" } attributes #1 = { convergent nounwind } attributes #2 = { nounwind } Index: test/CodeGen/AMDGPU/load-constant-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-constant-i16.ll +++ test/CodeGen/AMDGPU/load-constant-i16.ll @@ -260,8 +260,8 @@ ; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32: ; GCN-DAG: s_load_dwordx16 ; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} -; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]] -; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 +; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 +; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]] define void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { %load = load <32 x i16>, <32 x i16> addrspace(2)* %in %ext = zext <32 x i16> %load to <32 x i32> Index: test/CodeGen/AMDGPU/load-local-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-i16.ll +++ test/CodeGen/AMDGPU/load-local-i16.ll @@ -267,10 +267,10 @@ ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32: ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:6 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 -define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:3 +define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #1 { %load = load <32 x i16>, <32 x i16> addrspace(3)* %in %ext = zext <32 x i16> %load to <32 x i32> store <32 x i32> %ext, <32 x i32> addrspace(3)* %out @@ -282,25 +282,23 @@ ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:3 offset1:4 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:5{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 -define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { +define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #1 { %load = load <32 x i16>, <32 x i16> addrspace(3)* %in %ext = sext <32 x i16> %load to <32 x i32> store <32 x i32> %ext, <32 x i32> addrspace(3)* %out ret void } -; FIXME: Missed read2 ; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:11 offset1:15 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:15 offset1:12 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:64 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:9 offset1:10 -; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13 -; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:112 -define void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11 +; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:13 offset1:14 +define void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #1 { %load = load <64 x i16>, <64 x i16> addrspace(3)* %in %ext = zext <64 x i16> %load to <64 x i32> store <64 x i32> %ext, <64 x i32> addrspace(3)* %out @@ -452,3 +450,4 @@ ; } attributes #0 = { nounwind } +attributes #1 = { nounwind "target-features"="-promote-alloca" "amdgpu-num-active-waves-per-eu"="2" } Index: test/CodeGen/AMDGPU/private-memory-r600.ll =================================================================== --- test/CodeGen/AMDGPU/private-memory-r600.ll +++ test/CodeGen/AMDGPU/private-memory-r600.ll @@ -297,4 +297,4 @@ ; OPT: !0 = !{i32 0, i32 2048} -attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" } +attributes #0 = { nounwind "amdgpu-num-active-waves-per-eu"="1,2" } Index: test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll +++ test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll @@ -61,5 +61,5 @@ ret void } -attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" "amdgpu-max-waves-per-eu"="3" } +attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-active-waves-per-eu"="3,3" } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/promote-alloca-no-opts.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-no-opts.ll +++ test/CodeGen/AMDGPU/promote-alloca-no-opts.ll @@ -34,5 +34,5 @@ ret void } -attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" } -attributes #1 = { nounwind optnone noinline "amdgpu-max-work-group-size"="64" } +attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" } +attributes #1 = { nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" } Index: test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll +++ test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll @@ -127,4 +127,4 @@ ret void } -attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" } +attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-num-active-waves-per-eu"="1,7" } Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll @@ -61,4 +61,4 @@ declare i32* @get_unknown_pointer() #0 -attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="1" } +attributes #0 = { nounwind "amdgpu-num-active-waves-per-eu"="1,1" } Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll @@ -201,4 +201,4 @@ declare i32* @get_unknown_pointer() #0 -attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="1" } +attributes #0 = { nounwind "amdgpu-num-active-waves-per-eu"="1,1" } Index: test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll +++ test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll @@ -129,5 +129,5 @@ ret void } -attributes #0 = { norecurse nounwind "amdgpu-max-waves-per-eu"="1" } +attributes #0 = { norecurse nounwind "amdgpu-num-active-waves-per-eu"="1,1" } attributes #1 = { norecurse nounwind } \ No newline at end of file Index: test/CodeGen/AMDGPU/target-cpu.ll =================================================================== --- test/CodeGen/AMDGPU/target-cpu.ll +++ test/CodeGen/AMDGPU/target-cpu.ll @@ -108,5 +108,5 @@ attributes #2 = { nounwind "target-cpu"="tahiti" } attributes #3 = { nounwind "target-cpu"="bonaire" } attributes #4 = { nounwind "target-cpu"="fiji" } -attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-max-waves-per-eu"="3" } -attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-max-waves-per-eu"="3" } +attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-num-active-waves-per-eu"="1,3" } +attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-num-active-waves-per-eu"="1,3" }