diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -230,6 +230,10 @@ return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } + ArrayRef getAllProcessorDescriptions() const { + return ProcDesc; + } + virtual unsigned getHwMode() const { return 0; } /// Return the cache size in bytes for the given level of cache. diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -49,6 +49,7 @@ FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); FunctionPass *createAMDGPUUseNativeCallsPass(); +ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); FunctionPass *createAMDGPULateCodeGenPreparePass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); @@ -288,6 +289,9 @@ void initializeAMDGPUCodeGenPreparePass(PassRegistry&); extern char &AMDGPUCodeGenPrepareID; +void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); +extern char &AMDGPURemoveIncompatibleFunctionsID; + void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); extern char &AMDGPULateCodeGenPrepareID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp @@ -0,0 +1,189 @@ +//===-- AMDGPURemoveIncompatibleFunctions.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass replaces all uses of functions that use GPU features +/// incompatible with the current GPU with null then deletes the function. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "amdgpu-remove-incompatible-functions" + +using namespace llvm; + +namespace llvm { +extern const SubtargetFeatureKV + AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures - 1]; +} + +namespace { + +using Generation = AMDGPUSubtarget::Generation; + +class AMDGPURemoveIncompatibleFunctions : public ModulePass { +public: + static char ID; + + AMDGPURemoveIncompatibleFunctions(const TargetMachine *TM = nullptr) + : ModulePass(ID), TM(TM) { + assert(TM && "No TargetMachine!"); + } + + StringRef getPassName() const override { + return "AMDGPU Remove Incompatible Functions"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override {} + + /// Checks a single function, returns true if the function must be deleted. + bool checkFunction(Function &F); + + bool runOnModule(Module &M) override { + assert(TM->getTargetTriple().isAMDGCN()); + + SmallVector FnsToDelete; + for (Function &F : M) { + if (checkFunction(F)) + FnsToDelete.push_back(&F); + } + + for (Function *F : FnsToDelete) { + F->replaceAllUsesWith(ConstantPointerNull::get(F->getType())); + F->eraseFromParent(); + } + return !FnsToDelete.empty(); + } + +private: + const TargetMachine *TM = nullptr; +}; + +StringRef getFeatureName(unsigned Feature) { + for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) + if (Feature == KV.Value) + return KV.Key; + + llvm_unreachable("Unknown Target feature"); +} + +const SubtargetSubTypeKV *getGPUInfo(const GCNSubtarget &ST, + StringRef GPUName) { + for (const SubtargetSubTypeKV &KV : ST.getAllProcessorDescriptions()) + if (StringRef(KV.Key) == GPUName) + return &KV; + + return nullptr; +} + +constexpr unsigned FeaturesToCheck[] = { + AMDGPU::FeatureGFX11Insts, AMDGPU::FeatureGFX10Insts, + AMDGPU::FeatureGFX9Insts, AMDGPU::FeatureGFX8Insts, + AMDGPU::FeatureDPP, AMDGPU::Feature16BitInsts, + AMDGPU::FeatureDot1Insts, AMDGPU::FeatureDot2Insts, + AMDGPU::FeatureDot3Insts, AMDGPU::FeatureDot4Insts, + AMDGPU::FeatureDot5Insts, AMDGPU::FeatureDot6Insts, + AMDGPU::FeatureDot7Insts, AMDGPU::FeatureDot8Insts, +}; + +FeatureBitset expandImpliedFeatures(const FeatureBitset &Features) { + FeatureBitset Result = Features; + for (const SubtargetFeatureKV &FE : AMDGPUFeatureKV) { + if (Features.test(FE.Value) && FE.Implies.any()) + Result |= expandImpliedFeatures(FE.Implies.getAsBitset()); + } + return Result; +} + +static int DK_IncompatibleFn = getNextAvailablePluginDiagnosticKind(); + +struct DiagnosticInfoRemovingIncompatibleFunction + : public DiagnosticInfoWithLocationBase { + DiagnosticInfoRemovingIncompatibleFunction(Function &F, Twine M) + : DiagnosticInfoWithLocationBase(DiagnosticKind(DK_IncompatibleFn), + DS_Remark, F, DiagnosticLocation()), + Msg(M.str()) {} + + void print(DiagnosticPrinter &DP) const override { + DP << getFunction().getName() << ": removing function: " << Msg; + } + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_IncompatibleFn; + } + + std::string Msg; +}; + +} // end anonymous namespace + +bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) { + if (F.isDeclaration()) + return false; + + const GCNSubtarget *ST = + static_cast(TM->getSubtargetImpl(F)); + + // Check the GPU isn't generic. Generic is used for testing only + // and we don't want this pass to interfere with it. + StringRef GPUName = ST->getCPU(); + if (GPUName.empty() || GPUName.contains("generic")) + return false; + + // Try to fetch the GPU's info. If we can't, it's likely an unknown processor + // so just bail out. + const SubtargetSubTypeKV *GPUInfo = getGPUInfo(*ST, GPUName); + if (!GPUInfo) + return false; + + LLVMContext &Ctx = F.getContext(); + + // Get all the features implied by the current GPU, and recursively expand + // the features that imply other features. + // + // e.g. GFX90A implies FeatureGFX9, and FeatureGFX9 implies a whole set of + // other features. + const FeatureBitset GPUFeatureBits = + expandImpliedFeatures(GPUInfo->Implies.getAsBitset()); + + // Now that the have a FeatureBitset containing all possible features for + // the chosen GPU, check our list of "suspicious" features. + + // Check that the user didn't enable any features that aren't part of that + // GPU's feature set. We only check a predetermined set of features. + for (unsigned Feature : FeaturesToCheck) { + if (ST->hasFeature(Feature) && !GPUFeatureBits.test(Feature)) { + DiagnosticInfoRemovingIncompatibleFunction DiagInfo( + F, "+" + getFeatureName(Feature) + + " is not supported on the current target"); + Ctx.diagnose(DiagInfo); + return true; + } + } + + return false; +} + +INITIALIZE_PASS(AMDGPURemoveIncompatibleFunctions, DEBUG_TYPE, + "AMDGPU Remove Incompatible Functions", false, false) + +char AMDGPURemoveIncompatibleFunctions::ID = 0; + +ModulePass * +llvm::createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *TM) { + return new AMDGPURemoveIncompatibleFunctions(TM); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -216,6 +216,12 @@ cl::init(false), cl::Hidden); +static cl::opt RemoveIncompatibleFunctions( + "amdgpu-enable-remove-incompatible-functions", cl::Hidden, + cl::desc("Enable removal of functions when they" + "use features not supported by the target GPU"), + cl::init(true)); + static cl::opt EnableSDWAPeephole( "amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), @@ -380,6 +386,7 @@ initializeAMDGPULateCodeGenPreparePass(*PR); initializeAMDGPUPropagateAttributesEarlyPass(*PR); initializeAMDGPUPropagateAttributesLatePass(*PR); + initializeAMDGPURemoveIncompatibleFunctionsPass(*PR); initializeAMDGPUReplaceLDSUseWithPointerPass(*PR); initializeAMDGPULowerModuleLDSPass(*PR); initializeAMDGPURewriteOutArgumentsPass(*PR); @@ -1040,6 +1047,9 @@ void AMDGPUPassConfig::addCodeGenPrepare() { if (TM->getTargetTriple().getArch() == Triple::amdgcn) { + if (RemoveIncompatibleFunctions) + addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM)); + addPass(createAMDGPUAttributorPass()); // FIXME: This pass adds 2 hacky attributes that can be replaced with an @@ -1066,6 +1076,7 @@ bool AMDGPUPassConfig::addPreISel() { if (TM->getOptLevel() > CodeGenOpt::None) addPass(createFlattenCFGPass()); + return false; } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -90,6 +90,7 @@ AMDGPURegBankSelect.cpp AMDGPURegisterBankInfo.cpp AMDGPUReleaseVGPRs.cpp + AMDGPURemoveIncompatibleFunctions.cpp AMDGPUReplaceLDSUseWithPointer.cpp AMDGPUResourceUsageAnalysis.cpp AMDGPURewriteOutArguments.cpp diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s +; RUN: llc -global-isel -amdgpu-enable-remove-incompatible-functions=0 -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s ; Make sure legalizer info doesn't assert on dummy targets diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -50,6 +50,7 @@ ; GCN-O0-NEXT: Expand vector predication intrinsics ; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics ; GCN-O0-NEXT: Expand reduction intrinsics +; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O0-NEXT: AMDGPU Attributor ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Cycle Info Analysis @@ -231,6 +232,7 @@ ; GCN-O1-NEXT: Expand reduction intrinsics ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: TLS Variable Hoist +; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-NEXT: AMDGPU Attributor ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Cycle Info Analysis @@ -522,6 +524,7 @@ ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: TLS Variable Hoist ; GCN-O1-OPTS-NEXT: Early CSE +; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-OPTS-NEXT: AMDGPU Attributor ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Cycle Info Analysis @@ -827,6 +830,7 @@ ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: TLS Variable Hoist ; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O2-NEXT: AMDGPU Attributor ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Cycle Info Analysis @@ -1145,6 +1149,7 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering +; GCN-O3-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O3-NEXT: AMDGPU Attributor ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Cycle Info Analysis diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-functions.ll @@ -0,0 +1,446 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=bonaire -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX7,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX7 %s < %t + +; RUN: llc -march=amdgcn -mcpu=fiji -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX8,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX8 %s < %t + +; RUN: llc -march=amdgcn -mcpu=gfx906 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX9,GFX906,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX906 %s < %t + +; RUN: llc -march=amdgcn -mcpu=gfx90a -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX9,GFX90A,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX90A %s < %t + +; RUN: llc -march=amdgcn -mcpu=gfx1011 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX10,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX10 %s < %t + +; RUN: llc -march=amdgcn -mcpu=gfx1100 -stop-after=amdgpu-remove-incompatible-functions < %s 2>%t | FileCheck -check-prefixes=GFX11,IR %s +; RUN: FileCheck --check-prefix=WARN-GFX11 %s < %t + +; WARN-GFX7: needs_dpp: removing function: +dpp is not supported on the current target +; WARN-GFX7: needs_16bit_insts: removing function: +16-bit-insts is not supported on the current target +; WARN-GFX7: needs_gfx8_insts: removing function: +gfx8-insts is not supported on the current target +; WARN-GFX7: needs_gfx9_insts: removing function: +gfx9-insts is not supported on the current target +; WARN-GFX7: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX7: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX7: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target +; WARN-GFX7: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target +; WARN-GFX7: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX7: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX7: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target +; WARN-GFX7: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX7: needs_dot7_insts: removing function: +dot7-insts is not supported on the current target +; WARN-GFX7: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX7-NOT: not supported + +; WARN-GFX8: needs_gfx9_insts: removing function: +gfx9-insts is not supported on the current target +; WARN-GFX8: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX8: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX8: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target +; WARN-GFX8: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target +; WARN-GFX8: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX8: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX8: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target +; WARN-GFX8: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX8: needs_dot7_insts: removing function: +dot7-insts is not supported on the current target +; WARN-GFX8: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX8-NOT: not supported + +; WARN-GFX906: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX906: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX906: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX906: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX906: needs_dot5_insts: removing function: +dot5-insts is not supported on the current target +; WARN-GFX906: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX906: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX906-NOT: not supported + +; WARN-GFX90A: needs_gfx10_insts: removing function: +gfx10-insts is not supported on the current target +; WARN-GFX90A: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX90A: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX90A-NOT: not supported + +; WARN-GFX10: needs_gfx11_insts: removing function: +gfx11-insts is not supported on the current target +; WARN-GFX10: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX10: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX10: needs_dot8_insts: removing function: +dot8-insts is not supported on the current target +; WARN-GFX10-NOT: not supported + +; WARN-GFX11: needs_dot1_insts: removing function: +dot1-insts is not supported on the current target +; WARN-GFX11: needs_dot2_insts: removing function: +dot2-insts is not supported on the current target +; WARN-GFX11: needs_dot3_insts: removing function: +dot3-insts is not supported on the current target +; WARN-GFX11: needs_dot4_insts: removing function: +dot4-insts is not supported on the current target +; WARN-GFX11: needs_dot6_insts: removing function: +dot6-insts is not supported on the current target +; WARN-GFX11-NOT: not supported + +; GFX7: @GVRefs {{.*}} zeroinitializer +; GFX8: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null] +; GFX906: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr null, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr null, ptr null, ptr null, ptr null, ptr @needs_dot7_insts, ptr null] +; GFX90A: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr null, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr @needs_dot3_insts, ptr @needs_dot4_insts, ptr @needs_dot5_insts, ptr @needs_dot6_insts, ptr @needs_dot7_insts, ptr null] +; GFX10: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr @needs_gfx10_insts, ptr null, ptr @needs_dot1_insts, ptr @needs_dot2_insts, ptr null, ptr null, ptr @needs_dot5_insts, ptr @needs_dot6_insts, ptr @needs_dot7_insts, ptr null] +; GFX11: @GVRefs {{.*}} [ptr @needs_dpp, ptr @needs_16bit_insts, ptr @needs_gfx8_insts, ptr @needs_gfx9_insts, ptr @needs_gfx10_insts, ptr @needs_gfx11_insts, ptr null, ptr null, ptr null, ptr null, ptr @needs_dot5_insts, ptr null, ptr @needs_dot7_insts, ptr @needs_dot8_insts] +@GVRefs = internal global [14 x ptr] [ + ptr @needs_dpp, + ptr @needs_16bit_insts, + ptr @needs_gfx8_insts, + ptr @needs_gfx9_insts, + ptr @needs_gfx10_insts, + ptr @needs_gfx11_insts, + ptr @needs_dot1_insts, + ptr @needs_dot2_insts, + ptr @needs_dot3_insts, + ptr @needs_dot4_insts, + ptr @needs_dot5_insts, + ptr @needs_dot6_insts, + ptr @needs_dot7_insts, + ptr @needs_dot8_insts +] + +; GFX7: @ConstantExpr = internal global i64 0 +@ConstantExpr = internal global i64 ptrtoint (ptr @needs_dpp to i64) + +define void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #0 { +; GFX7-NOT: define void @needs_dpp( +; GFX8: define void @needs_dpp( +; GFX9: define void @needs_dpp( +; GFX10: define void @needs_dpp( +; GFX11: define void @needs_dpp( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #1 { +; GFX7-NOT: define void @needs_16bit_insts( +; GFX8: define void @needs_16bit_insts( +; GFX9: define void @needs_16bit_insts( +; GFX10: define void @needs_16bit_insts( +; GFX11: define void @needs_16bit_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #2 { +; GFX7-NOT: define void @needs_gfx8_insts( +; GFX8: define void @needs_gfx8_insts( +; GFX9: define void @needs_gfx8_insts( +; GFX10: define void @needs_gfx8_insts( +; GFX11: define void @needs_gfx8_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #3 { +; GFX7-NOT: define void @needs_gfx9_insts( +; GFX8-NOT: define void @needs_gfx9_insts( +; GFX9: define void @needs_gfx9_insts( +; GFX10: define void @needs_gfx9_insts( +; GFX11: define void @needs_gfx9_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #4 { +; GFX7-NOT: define void @needs_gfx10_insts( +; GFX8-NOT: define void @needs_gfx10_insts( +; GFX9-NOT: define void @needs_gfx10_insts( +; GFX10: define void @needs_gfx10_insts( +; GFX11: define void @needs_gfx10_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #5 { +; GFX7-NOT: define void @needs_gfx11_insts( +; GFX8-NOT: define void @needs_gfx11_insts( +; GFX9-NOT: define void @needs_gfx11_insts( +; GFX10-NOT: define void @needs_gfx11_insts( +; GFX11: define void @needs_gfx11_insts( +entry: + %cmp = icmp eq i64 %a, 0 + br i1 %cmp, label %if, label %else + +if: + %ld = load i64, ptr %in + br label %endif + +else: + %add = add i64 %a, %b + br label %endif + +endif: + %phi = phi i64 [%ld, %if], [%add, %else] + store i64 %phi, ptr %out + ret void +} + +define void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #6 { +; GFX7-NOT: define void @needs_dot1_insts( +; GFX8-NOT: define void @needs_dot1_insts( +; GFX9: define void @needs_dot1_insts( +; GFX10: define void @needs_dot1_insts( +; GFX11-NOT: define void @needs_dot1_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #7 { +; GFX7-NOT: define void @needs_dot2_insts( +; GFX8-NOT: define void @needs_dot2_insts( +; GFX9: define void @needs_dot2_insts( +; GFX10: define void @needs_dot2_insts( +; GFX11-NOT: define void @needs_dot2_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #8 { +; GFX7-NOT: define void @needs_dot3_insts( +; GFX8-NOT: define void @needs_dot3_insts( +; GFX906-NOT: define void @needs_dot3_insts( +; GFX90A: define void @needs_dot3_insts( +; GFX10-NOT: define void @needs_dot3_insts( +; GFX11-NOT: define void @needs_dot3_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + + +define void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #9 { +; GFX7-NOT: define void @needs_dot4_insts( +; GFX8-NOT: define void @needs_dot4_insts( +; GFX906-NOT: define void @needs_dot4_insts( +; GFX90A: define void @needs_dot4_insts( +; GFX10-NOT: define void @needs_dot4_insts( +; GFX11-NOT: define void @needs_dot4_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #10 { +; GFX7-NOT: define void @needs_dot5_insts( +; GFX8-NOT: define void @needs_dot5_insts( +; GFX906-NOT: define void @needs_dot5_insts( +; GFX90A: define void @needs_dot5_insts( +; GFX10: define void @needs_dot5_insts( +; GFX11: define void @needs_dot5_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #11 { +; GFX7-NOT: define void @needs_dot6_insts( +; GFX8-NOT: define void @needs_dot6_insts( +; GFX906-NOT: define void @needs_dot6_insts( +; GFX90A: define void @needs_dot6_insts( +; GFX10: define void @needs_dot6_insts( +; GFX11-NOT: define void @needs_dot6_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #12 { +; GFX7-NOT: define void @needs_dot7_insts( +; GFX8-NOT: define void @needs_dot7_insts( +; GFX9: define void @needs_dot7_insts( +; GFX10: define void @needs_dot7_insts( +; GFX11: define void @needs_dot7_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +define void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) #13 { +; GFX7-NOT: define void @needs_dot8_insts( +; GFX8-NOT: define void @needs_dot8_insts( +; GFX9-NOT: define void @needs_dot8_insts( +; GFX10-NOT: define void @needs_dot8_insts( +; GFX11: define void @needs_dot8_insts( + %add = add i64 %a, %b + store i64 %add, ptr %out + ret void +} + +; IR: define void @caller( +define void @caller(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) { + ; GFX7: call void null( + ; GFX8: call void @needs_dpp( + ; GFX9: call void @needs_dpp( + ; GFX10: call void @needs_dpp( + ; GFX11: call void @needs_dpp( + call void @needs_dpp(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void @needs_16bit_insts( + ; GFX9: call void @needs_16bit_insts( + ; GFX10: call void @needs_16bit_insts( + ; GFX11: call void @needs_16bit_insts( + call void @needs_16bit_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void @needs_gfx8_insts( + ; GFX9: call void @needs_gfx8_insts( + ; GFX10: call void @needs_gfx8_insts( + ; GFX11: call void @needs_gfx8_insts( + call void @needs_gfx8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_gfx9_insts( + ; GFX10: call void @needs_gfx9_insts( + ; GFX111: call void @needs_gfx9_insts(c + call void @needs_gfx9_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void null( + ; GFX10: call void @needs_gfx10_insts( + ; GFX111: call void @needs_gfx10_insts( + call void @needs_gfx10_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void null( + ; GFX10: call void null( + ; GFX11: call void @needs_gfx11_insts( + call void @needs_gfx11_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_dot1_insts( + ; GFX10: call void @needs_dot1_insts( + ; GFX11: call void null( + call void @needs_dot1_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_dot2_insts( + ; GFX10: call void @needs_dot2_insts( + ; GFX11: call void null( + call void @needs_dot2_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot3_insts( + ; GFX10: call void null( + ; GFX11: call void null( + call void @needs_dot3_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot4_insts( + ; GFX10: call void null( + ; GFX11: call void null( + call void @needs_dot4_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot5_insts( + ; GFX10: call void @needs_dot5_insts( + ; GFX11: call void @needs_dot5_insts( + call void @needs_dot5_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX906: call void null( + ; GFX90A: call void @needs_dot6_insts( + ; GFX10: call void @needs_dot6_insts( + ; GFX11: call void null( + call void @needs_dot6_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void @needs_dot7_insts( + ; GFX10: call void @needs_dot7_insts( + ; GFX11: call void @needs_dot7_insts( + call void @needs_dot7_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; GFX7: call void null( + ; GFX8: call void null( + ; GFX9: call void null( + ; GFX10: call void null( + ; GFX11: call void @needs_dot8_insts( + call void @needs_dot8_insts(ptr %out, ptr %in, i64 %a, i64 %b, i64 %c) + ; IR: ret void + ret void +} + +attributes #0 = { "target-features"="+dpp" } +attributes #1 = { "target-features"="+16-bit-insts" } +attributes #2 = { "target-features"="+gfx8-insts" } +attributes #3 = { "target-features"="+gfx9-insts" } +attributes #4 = { "target-features"="+gfx10-insts" } +attributes #5 = { "target-features"="+gfx11-insts" } +attributes #6 = { "target-features"="+dot1-insts" } +attributes #7 = { "target-features"="+dot2-insts" } +attributes #8 = { "target-features"="+dot3-insts" } +attributes #9 = { "target-features"="+dot4-insts" } +attributes #10 = { "target-features"="+dot5-insts" } +attributes #11 = { "target-features"="+dot6-insts" } +attributes #12 = { "target-features"="+dot7-insts" } +attributes #13 = { "target-features"="+dot8-insts" }