Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -151,6 +151,26 @@ return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()}; } + std::pair + getWavesPerEU(const Function &F, + std::pair FlatWorkGroupSize) { + const GCNSubtarget &ST = TM.getSubtarget(F); + return ST.getWavesPerEU(F, FlatWorkGroupSize); + } + + std::pair + getEffectiveWavesPerEU(const Function &F, + std::pair WavesPerEU, + std::pair FlatWorkGroupSize) { + const GCNSubtarget &ST = TM.getSubtarget(F); + return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize); + } + + unsigned getMaxWavesPerEU(const Function &F) { + const GCNSubtarget &ST = TM.getSubtarget(F); + return ST.getMaxWavesPerEU(); + } + private: /// Check if the ConstantExpr \p CE requires queue ptr attribute. static bool visitConstExpr(const ConstantExpr *CE) { @@ -503,35 +523,33 @@ llvm_unreachable("AAAMDAttributes is only valid for function position"); } -/// Propagate amdgpu-flat-work-group-size attribute. -struct AAAMDFlatWorkGroupSize +struct AAAMDSizeRangeAttribute : public StateWrapper { using Base = StateWrapper; - AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) - : Base(IRP, 32) {} + + StringRef AttrName; + + AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A, + StringRef AttrName) + : Base(IRP, 32), AttrName(AttrName) {} /// See AbstractAttribute::getState(...). IntegerRangeState &getState() override { return *this; } const IntegerRangeState &getState() const override { return *this; } - void initialize(Attributor &A) override { - Function *F = getAssociatedFunction(); - auto &InfoCache = static_cast(A.getInfoCache()); - unsigned MinGroupSize, MaxGroupSize; - std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); - intersectKnown( - ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); - } + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} - ChangeStatus updateImpl(Attributor &A) override { + template + ChangeStatus updateImplImpl(Attributor &A) { ChangeStatus Change = ChangeStatus::UNCHANGED; auto CheckCallSite = [&](AbstractCallSite CS) { Function *Caller = CS.getInstruction()->getFunction(); - LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName() + LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName() << "->" << getAssociatedFunction()->getName() << '\n'); - const auto &CallerInfo = A.getAAFor( + const auto &CallerInfo = A.getAAFor( *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); Change |= @@ -547,25 +565,20 @@ return Change; } - ChangeStatus manifest(Attributor &A) override { - SmallVector AttrList; - Function *F = getAssociatedFunction(); - LLVMContext &Ctx = F->getContext(); - - auto &InfoCache = static_cast(A.getInfoCache()); - unsigned Min, Max; - std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); - + ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min, + unsigned Max) { // Don't add the attribute if it's the implied default. if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max) return ChangeStatus::UNCHANGED; + Function *F = getAssociatedFunction(); + LLVMContext &Ctx = F->getContext(); + SmallVector AttrList; SmallString<10> Buffer; raw_svector_ostream OS(Buffer); OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; - AttrList.push_back( - Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str())); + AttrList.push_back(Attribute::get(Ctx, AttrName, OS.str())); return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, /* ForceReplace */ true); } @@ -573,19 +586,43 @@ const std::string getAsStr() const override { std::string Str; raw_string_ostream OS(Str); - OS << "AMDFlatWorkGroupSize["; + OS << getName() << '['; OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1; OS << ']'; return OS.str(); } +}; - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override {} +/// Propagate amdgpu-flat-work-group-size attribute. +struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { + AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A) + : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {} + + void initialize(Attributor &A) override { + Function *F = getAssociatedFunction(); + auto &InfoCache = static_cast(A.getInfoCache()); + unsigned MinGroupSize, MaxGroupSize; + std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); + intersectKnown( + ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); + } + + ChangeStatus updateImpl(Attributor &A) override { + return updateImplImpl(A); + } /// Create an abstract attribute view for the position \p IRP. static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP, Attributor &A); + ChangeStatus manifest(Attributor &A) override { + Function *F = getAssociatedFunction(); + auto &InfoCache = static_cast(A.getInfoCache()); + unsigned Min, Max; + std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F); + return emitAttributeIfNotDefault(A, Min, Max); + } + /// See AbstractAttribute::getName() const std::string getName() const override { return "AAAMDFlatWorkGroupSize"; @@ -615,6 +652,106 @@ "AAAMDFlatWorkGroupSize is only valid for function position"); } +/// Propagate amdgpu-waves-per-eu attribute. +struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { + AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A) + : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {} + + bool isValidState() const override { + return !Assumed.isEmptySet() && IntegerRangeState::isValidState(); + } + + void initialize(Attributor &A) override { + Function *F = getAssociatedFunction(); + + const AAAMDFlatWorkGroupSize &FlatWorkGroupSize = + A.getAAFor(*this, IRPosition::function(*F), + DepClassTy::REQUIRED); + + const auto &AssumedFlatWorkGroup = FlatWorkGroupSize.getAssumed(); + + auto &InfoCache = static_cast(A.getInfoCache()); + unsigned Min, Max; + std::tie(Min, Max) = InfoCache.getWavesPerEU( + *F, {AssumedFlatWorkGroup.getLower().getZExtValue(), + AssumedFlatWorkGroup.getUpper().getZExtValue() - 1}); + + ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); + intersectKnown(Range); + } + + ChangeStatus updateImpl(Attributor &A) override { + auto &InfoCache = static_cast(A.getInfoCache()); + ChangeStatus Change = ChangeStatus::UNCHANGED; + + auto CheckCallSite = [&](AbstractCallSite CS) { + Function *Caller = CS.getInstruction()->getFunction(); + LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName() + << "->" << getAssociatedFunction()->getName() << '\n'); + + const auto &CallerInfo = A.getAAFor( + *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); + const auto &CallerGroupSize = A.getAAFor( + *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); + + unsigned Min, Max; + std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU( + *Caller, + {CallerInfo.getAssumed().getLower().getZExtValue(), + CallerInfo.getAssumed().getUpper().getZExtValue() - 1}, + {CallerGroupSize.getAssumed().getLower().getZExtValue(), + CallerGroupSize.getAssumed().getUpper().getZExtValue() - 1}); + ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1)); + IntegerRangeState CallerRangeState(CallerRange); + Change |= + clampStateAndIndicateChange(this->getState(), CallerRangeState); + + return true; + }; + + bool AllCallSitesKnown = true; + if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) + indicatePessimisticFixpoint(); + + return Change; + } + + /// Create an abstract attribute view for the position \p IRP. + static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP, + Attributor &A); + + ChangeStatus manifest(Attributor &A) override { + Function *F = getAssociatedFunction(); + auto &InfoCache = static_cast(A.getInfoCache()); + unsigned Max = InfoCache.getMaxWavesPerEU(*F); + return emitAttributeIfNotDefault(A, 1, Max); + } + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAAMDWavesPerEU"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAAMDWavesPerEU + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + +const char AAAMDWavesPerEU::ID = 0; + +AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP, + Attributor &A) { + if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) + return *new (A.Allocator) AAAMDWavesPerEU(IRP, A); + llvm_unreachable("AAAMDWavesPerEU is only valid for function position"); +} + class AMDGPUAttributor : public ModulePass { public: AMDGPUAttributor() : ModulePass(ID) {} @@ -643,7 +780,7 @@ AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); DenseSet Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, - &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID}); + &AAAMDFlatWorkGroupSize::ID, &AAAMDWavesPerEU::ID, &AACallEdges::ID}); Attributor A(Functions, InfoCache, CGUpdater, &Allowed); @@ -653,6 +790,7 @@ A.getOrCreateAAFor(IRPosition::function(F)); if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) { A.getOrCreateAAFor(IRPosition::function(F)); + A.getOrCreateAAFor(IRPosition::function(F)); } } } Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -103,6 +103,9 @@ std::pair getWavesPerEU(const Function &F, std::pair FlatWorkGroupSizes) const; + std::pair + getEffectiveWavesPerEU(std::pair WavesPerEU, + std::pair FlatWorkGroupSizes) const; /// Return the amount of LDS that can be used that will not restrict the /// occupancy lower than WaveCount. Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -532,8 +532,9 @@ return Requested; } -std::pair AMDGPUSubtarget::getWavesPerEU( - const Function &F, std::pair FlatWorkGroupSizes) const { +std::pair AMDGPUSubtarget::getEffectiveWavesPerEU( + std::pair Requested, + std::pair FlatWorkGroupSizes) const { // Default minimum/maximum number of waves per execution unit. std::pair Default(1, getMaxWavesPerEU()); @@ -545,10 +546,6 @@ getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second); Default.first = MinImpliedByFlatWorkGroupSize; - // Requested minimum/maximum number of waves per execution unit. - std::pair Requested = AMDGPU::getIntegerPairAttribute( - F, "amdgpu-waves-per-eu", Default, true); - // Make sure requested minimum is less than requested maximum. if (Requested.second && Requested.first > Requested.second) return Default; @@ -566,6 +563,17 @@ return Requested; } +std::pair AMDGPUSubtarget::getWavesPerEU( + const Function &F, std::pair FlatWorkGroupSizes) const { + // Default minimum/maximum number of waves per execution unit. + std::pair Default(1, getMaxWavesPerEU()); + + // Requested minimum/maximum number of waves per execution unit. + std::pair Requested = AMDGPU::getIntegerPairAttribute( + F, "amdgpu-waves-per-eu", Default, true); + return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes); +} + static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim) { auto Node = Kernel.getMetadata("reqd_work_group_size"); if (Node && Node->getNumOperands() == 3) Index: llvm/lib/Target/AMDGPU/GCNSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -30,10 +30,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, public AMDGPUSubtarget { - +public: using AMDGPUSubtarget::getMaxWavesPerEU; -public: // Following 2 enums are documented at: // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi enum class TrapHandlerAbi { Index: llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -217,7 +217,7 @@ ; AKF_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group -; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) ; ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*) @@ -232,4 +232,5 @@ ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll +++ llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll @@ -117,14 +117,14 @@ ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -611,7 +611,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr -; ATTRIBUTOR_HSA-SAME: () #[[ATTR14]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() ; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -628,7 +628,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr -; ATTRIBUTOR_HSA-SAME: () #[[ATTR14]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { ; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -645,7 +645,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -658,7 +658,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external -; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @external.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -673,7 +673,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_defined -; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { ; ATTRIBUTOR_HSA-NEXT: call void @defined.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -687,8 +687,8 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm -; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { -; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR20:[0-9]+]] +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { +; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR24:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; call void asm sideeffect "", ""() #3 @@ -702,7 +702,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external -; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @external.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -717,7 +717,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined -; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { ; ATTRIBUTOR_HSA-NEXT: call void @defined.func() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -769,7 +769,7 @@ ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call -; ATTRIBUTOR_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR15]] { +; ATTRIBUTOR_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR16]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -788,7 +788,7 @@ ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_extern_call -; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @extern() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -806,7 +806,7 @@ ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_null_call -; ATTRIBUTOR_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR15]] { +; ATTRIBUTOR_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR16]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float null() ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -827,7 +827,7 @@ ; AKF_HSA-NEXT: ret float [[FADD]] ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call -; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR16]] { +; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR17]] { ; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]]) ; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] @@ -845,7 +845,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(1)* null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -861,7 +861,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(1)* null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -877,7 +877,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -893,7 +893,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -912,7 +912,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_decl_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] { ; ATTRIBUTOR_HSA-NEXT: call void @extern_func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -937,24 +937,28 @@ ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind } +; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind } ;. Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -628,7 +628,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR12:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) ; ATTRIBUTOR_HSA-NEXT: store i32 0, i32 addrspace(5)* [[ALLOCA]], align 4 ; ATTRIBUTOR_HSA-NEXT: ret void @@ -658,4 +658,5 @@ ; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -35,6 +35,6 @@ ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -42,6 +42,6 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll +++ llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll @@ -204,11 +204,11 @@ ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll @@ -0,0 +1,289 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s + +; Check propagation of amdgpu-flat-work-group-size attribute. + +; Called from a single kernel with 1,8 +define internal void @default_to_1_8_a() { +; CHECK-LABEL: define {{[^@]+}}@default_to_1_8_a +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @kernel_1_8() #0 { +; CHECK-LABEL: define {{[^@]+}}@kernel_1_8 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @default_to_1_8_a() +; CHECK-NEXT: ret void +; + call void @default_to_1_8_a() + ret void +} + +; Called from a single kernel with 1,2 +define internal void @default_to_1_2() { +; CHECK-LABEL: define {{[^@]+}}@default_to_1_2 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @kernel_1_2() #1 { +; CHECK-LABEL: define {{[^@]+}}@kernel_1_2 +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @default_to_1_2() +; CHECK-NEXT: call void @flat_group_1_1() +; CHECK-NEXT: call void @default_to_1_8_b() +; CHECK-NEXT: call void @flat_group_2_8() +; CHECK-NEXT: ret void +; + call void @default_to_1_2() + call void @flat_group_1_1() + call void @default_to_1_8_b() + call void @flat_group_2_8() + ret void +} + +; Called from kernels with 2,9 and 9,9 +define internal void @default_to_2_9() { +; CHECK-LABEL: define {{[^@]+}}@default_to_2_9 +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +; This already has a strict bounds, but called from kernels with wider +; bounds, and should not be changed. +define internal void @flat_group_1_1() #2 { +; CHECK-LABEL: define {{[^@]+}}@flat_group_1_1 +; CHECK-SAME: () #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +; 2,8 -> 2,2 +define internal void @flat_group_2_8() #3 { +; CHECK-LABEL: define {{[^@]+}}@flat_group_2_8 +; CHECK-SAME: () #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +; 9,10 -> 9,9 +define internal void @flat_group_9_10() #4 { +; CHECK-LABEL: define {{[^@]+}}@flat_group_9_10 +; CHECK-SAME: () #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @kernel_2_9() #5 { +; CHECK-LABEL: define {{[^@]+}}@kernel_2_9 +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: call void @default_to_2_9() +; CHECK-NEXT: call void @flat_group_1_1() +; CHECK-NEXT: ret void +; + call void @default_to_2_9() + call void @flat_group_1_1() + ret void +} + +define amdgpu_kernel void @kernel_9_9() #6 { +; CHECK-LABEL: define {{[^@]+}}@kernel_9_9 +; CHECK-SAME: () #[[ATTR5]] { +; CHECK-NEXT: call void @default_to_2_9() +; CHECK-NEXT: call void @flat_group_9_10() +; CHECK-NEXT: ret void +; + call void @default_to_2_9() + call void @flat_group_9_10() + ret void +} + +; Called from kernels with 2,8 and 1,2 => 1,8 +define internal void @default_to_1_8_b() { +; CHECK-LABEL: define {{[^@]+}}@default_to_1_8_b +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret void +; + ret void +} + +; The kernel's lower bound is higher than the callee's lower bound, so +; this should probably be illegal. +define amdgpu_kernel void @kernel_2_8() #3 { +; CHECK-LABEL: define {{[^@]+}}@kernel_2_8 +; CHECK-SAME: () #[[ATTR6:[0-9]+]] { +; CHECK-NEXT: call void @default_to_1_8_a() +; CHECK-NEXT: call void @default_to_1_8_b() +; CHECK-NEXT: ret void +; + call void @default_to_1_8_a() + call void @default_to_1_8_b() + ret void +} + +; 1,2 -> 1,2 +define internal void @merge_cycle_0() #1 { +; CHECK-LABEL: define {{[^@]+}}@merge_cycle_0 +; CHECK-SAME: () #[[ATTR4]] { +; CHECK-NEXT: call void @merge_cycle_1() +; CHECK-NEXT: ret void +; + call void @merge_cycle_1() + ret void +} + +; Called from 1,2 + 3,8 +; 2,8 -> 2,8 +define internal void @merge_cycle_1() #3 { +; CHECK-LABEL: define {{[^@]+}}@merge_cycle_1 +; CHECK-SAME: () #[[ATTR6]] { +; CHECK-NEXT: call void @merge_cycle_0() +; CHECK-NEXT: ret void +; + call void @merge_cycle_0() + ret void +} + +define amdgpu_kernel void @kernel_3_8() #7 { +; CHECK-LABEL: define {{[^@]+}}@kernel_3_8 +; CHECK-SAME: () #[[ATTR7:[0-9]+]] { +; CHECK-NEXT: call void @merge_cycle_0() +; CHECK-NEXT: call void @default_captured_address() +; CHECK-NEXT: call void @externally_visible_default() +; CHECK-NEXT: [[F32:%.*]] = call float bitcast (i32 ()* @bitcasted_function to float ()*)() +; CHECK-NEXT: ret void +; + call void @merge_cycle_0() + call void @default_captured_address() + call void @externally_visible_default() + %f32 = call float bitcast (i32 ()* @bitcasted_function to float ()*)() + ret void +} + +define internal void @default_captured_address() { +; CHECK-LABEL: define {{[^@]+}}@default_captured_address +; CHECK-SAME: () #[[ATTR8:[0-9]+]] { +; CHECK-NEXT: store volatile void ()* @default_captured_address, void ()** undef, align 8 +; CHECK-NEXT: ret void +; + store volatile void ()* @default_captured_address, void ()** undef, align 8 + ret void +} + +define void @externally_visible_default() { +; CHECK-LABEL: define {{[^@]+}}@externally_visible_default +; CHECK-SAME: () #[[ATTR8]] { +; CHECK-NEXT: ret void +; + ret void +} + +; 1,10 -> 3,8 +define internal i32 @bitcasted_function() { +; CHECK-LABEL: define {{[^@]+}}@bitcasted_function +; CHECK-SAME: () #[[ATTR7]] { +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +define internal void @called_from_invalid_bounds_0() { +; CHECK-LABEL: define {{[^@]+}}@called_from_invalid_bounds_0 +; CHECK-SAME: () #[[ATTR9:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define internal void @called_from_invalid_bounds_1() { +; CHECK-LABEL: define {{[^@]+}}@called_from_invalid_bounds_1 +; CHECK-SAME: () #[[ATTR9]] { +; CHECK-NEXT: ret void +; + ret void +} + +; Invalid range for amdgpu-waves-per-eu +define amdgpu_kernel void @kernel_invalid_bounds_0_8() #8 { +; CHECK-LABEL: define {{[^@]+}}@kernel_invalid_bounds_0_8 +; CHECK-SAME: () #[[ATTR10:[0-9]+]] { +; CHECK-NEXT: call void @called_from_invalid_bounds_0() +; CHECK-NEXT: ret void +; + call void @called_from_invalid_bounds_0() + ret void +} + +; Invalid range for amdgpu-waves-per-eu +define amdgpu_kernel void @kernel_invalid_bounds_1_123() #9 { +; CHECK-LABEL: define {{[^@]+}}@kernel_invalid_bounds_1_123 +; CHECK-SAME: () #[[ATTR11:[0-9]+]] { +; CHECK-NEXT: call void @called_from_invalid_bounds_1() +; CHECK-NEXT: ret void +; + call void @called_from_invalid_bounds_1() + ret void +} + +; XXX - Why is the maximum not 6? +; The 512 maximum workgroup size implies a minimum occupancy of 2. The +; implied minimim waves-per-eu should not be 3 +define void @larger_group_size_implies_lower_minimum() #10 { +; CHECK-LABEL: define {{[^@]+}}@larger_group_size_implies_lower_minimum +; CHECK-SAME: () #[[ATTR12:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +define amdgpu_kernel void @kernel_3_6() #11 { +; CHECK-LABEL: define {{[^@]+}}@kernel_3_6 +; CHECK-SAME: () #[[ATTR13:[0-9]+]] { +; CHECK-NEXT: call void @larger_group_size_implies_lower_minimum() +; CHECK-NEXT: ret void +; + call void @larger_group_size_implies_lower_minimum() + ret void +} + +; Use a 1 wave workgroup so there is no interaction by the workgroup +; size on the implied waves per EU. +attributes #0 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,8" } +attributes #1 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" } +attributes #2 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,1" } +attributes #3 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" } +attributes #4 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,10" } +attributes #5 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,9" } +attributes #6 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,9" } +attributes #7 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="3,8" } +attributes #8 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="0,8" } +attributes #9 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,123" } +attributes #10 = { "amdgpu-flat-work-group-size"="1,512" } +attributes #11 = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-waves-per-eu"="3,6" } + +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,2" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR9]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="0,8" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } +;. Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -73,6 +73,6 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -21,7 +21,7 @@ define amdgpu_kernel void @kernel1() #1 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: ret void ; @@ -31,5 +31,6 @@ attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -43,7 +43,7 @@ define internal void @internal3() { ; CHECK-LABEL: define {{[^@]+}}@internal3 -; CHECK-SAME: () #[[ATTR1]] { +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] @@ -67,7 +67,7 @@ define internal void @internal4() { ; CHECK-LABEL: define {{[^@]+}}@internal4 -; CHECK-SAME: () #[[ATTR1]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: store i32 1, i32* @G2, align 4 ; CHECK-NEXT: ret void ; @@ -77,7 +77,7 @@ define internal void @internal2() { ; CHECK-LABEL: define {{[^@]+}}@internal2 -; CHECK-SAME: () #[[ATTR1]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @internal3() ; CHECK-NEXT: ret void ; @@ -87,7 +87,7 @@ define amdgpu_kernel void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 -; CHECK-SAME: () #[[ATTR1]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @internal2() ; CHECK-NEXT: ret void ; @@ -97,6 +97,7 @@ attributes #0 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -41,6 +41,6 @@ attributes #2 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -31,7 +31,7 @@ define amdgpu_kernel void @kernel2() #2 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: call void @func() ; CHECK-NEXT: ret void ; @@ -41,6 +41,7 @@ attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -101,7 +101,7 @@ attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } ;. Index: llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -49,7 +49,7 @@ define amdgpu_kernel void @kernel3() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel3 -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: call void @func2() ; CHECK-NEXT: call void @func3() ; CHECK-NEXT: ret void @@ -61,5 +61,6 @@ attributes #0 = { "uniform-work-group-size"="false" } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;.