diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -47,6 +47,7 @@ FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); FunctionPass *createAMDGPUUseNativeCallsPass(); +FunctionPass *createAMDGPUClearIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); FunctionPass *createAMDGPULateCodeGenPreparePass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); @@ -287,6 +288,9 @@ void initializeAMDGPUCodeGenPreparePass(PassRegistry&); extern char &AMDGPUCodeGenPrepareID; +void initializeAMDGPUClearIncompatibleFunctionsPass(PassRegistry &); +extern char &AMDGPUClearIncompatibleFunctionsID; + void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); extern char &AMDGPULateCodeGenPrepareID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUClearIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPUClearIncompatibleFunctions.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUClearIncompatibleFunctions.cpp @@ -0,0 +1,126 @@ +//===-- AMDGPUClearIncompatibleFunctions.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass replaces the bodies of functions that have attributes incompatible +/// with the current target with trap/unreachable. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "amdgpu-clear-incompatible-functions" + +using namespace llvm; + +namespace llvm { +extern const SubtargetFeatureKV + AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures - 1]; +} + +namespace { + +using Generation = AMDGPUSubtarget::Generation; + +class AMDGPUClearIncompatibleFunctions : public FunctionPass { +public: + static char ID; + + AMDGPUClearIncompatibleFunctions(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + assert(TM && "No TargetMachine!"); + } + + StringRef getPassName() const override { + return "AMDGPU Clear Incompatible Functions Bodies"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // If changes are made, no analyses are preserved. + } + + bool runOnFunction(Function &F) override; + +private: + const TargetMachine *TM = nullptr; +}; + +// List of features alongside the minimum GPU generation needed to support them. +constexpr std::array, 6> FeatureAndMinGen = { + {{AMDGPU::FeatureGFX11Insts, Generation::GFX11}, + {AMDGPU::FeatureGFX10Insts, Generation::GFX10}, + {AMDGPU::FeatureGFX9Insts, Generation::GFX9}, + {AMDGPU::FeatureGFX8Insts, Generation::VOLCANIC_ISLANDS}, + {AMDGPU::FeatureDPP, Generation::VOLCANIC_ISLANDS}, + {AMDGPU::Feature16BitInsts, Generation::VOLCANIC_ISLANDS}}}; + +StringRef GetFeatureName(unsigned Feature) { + for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) + if (Feature == KV.Value) + return KV.Key; + + llvm_unreachable("Unknown Target feature"); +} + +} // end anonymous namespace + +bool AMDGPUClearIncompatibleFunctions::runOnFunction(Function &F) { + if (skipFunction(F) || F.isDeclaration()) + return false; + + LLVMContext &Ctx = F.getContext(); + const GCNSubtarget *ST = + static_cast(TM->getSubtargetImpl(F)); + Generation GPUGen = ST->getGeneration(); + + // Note: this pass checks attributes for GCN, so check we have a GCN GPU. + if (GPUGen < Generation::SOUTHERN_ISLANDS) + return false; + + bool Remove = false; + for (const auto &[Feature, MinGPUGen] : FeatureAndMinGen) { + if (ST->hasFeature(Feature) && GPUGen < MinGPUGen) { + Remove = true; + std::string Msg = + "+" + GetFeatureName(Feature).str() + + " is not supported on the current target. Deleting function body."; + DiagnosticInfoUnsupported DiagInfo(F, Msg, DiagnosticLocation(), + DS_Warning); + Ctx.diagnose(DiagInfo); + } + } + + if (!Remove) + return false; + + F.dropAllReferences(); + assert(F.empty()); + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", &F); + IRBuilder<> Builder(Entry); + Builder.CreateIntrinsic(Intrinsic::trap, {}, {}); + Builder.CreateUnreachable(); + return true; +} + +INITIALIZE_PASS(AMDGPUClearIncompatibleFunctions, DEBUG_TYPE, + "AMDGPU Clear Incompatible Functions Bodies", false, false) + +char AMDGPUClearIncompatibleFunctions::ID = 0; + +FunctionPass * +llvm::createAMDGPUClearIncompatibleFunctionsPass(const TargetMachine *TM) { + return new AMDGPUClearIncompatibleFunctions(TM); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -213,6 +213,12 @@ cl::init(false), cl::Hidden); +static cl::opt ClearIncompatibleFunctionsBodies( + "amdgpu-clear-incompatible-function-bodies", cl::Hidden, + cl::desc("Enable deletion of function bodies when they" + "use features not supported by the target GPU"), + cl::init(true)); + static cl::opt EnableSDWAPeephole( "amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), @@ -376,6 +382,7 @@ initializeAMDGPULateCodeGenPreparePass(*PR); initializeAMDGPUPropagateAttributesEarlyPass(*PR); initializeAMDGPUPropagateAttributesLatePass(*PR); + initializeAMDGPUClearIncompatibleFunctionsPass(*PR); initializeAMDGPUReplaceLDSUseWithPointerPass(*PR); initializeAMDGPULowerModuleLDSPass(*PR); initializeAMDGPURewriteOutArgumentsPass(*PR); @@ -1058,6 +1065,11 @@ bool AMDGPUPassConfig::addPreISel() { if (TM->getOptLevel() > CodeGenOpt::None) addPass(createFlattenCFGPass()); + + if (ClearIncompatibleFunctionsBodies) + addPass( + createAMDGPUClearIncompatibleFunctionsPass(&getAMDGPUTargetMachine())); + return false; } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -50,6 +50,7 @@ AMDGPUAtomicOptimizer.cpp AMDGPUAttributor.cpp AMDGPUCallLowering.cpp + AMDGPUClearIncompatibleFunctions.cpp AMDGPUCodeGenPrepare.cpp AMDGPUCombinerHelper.cpp AMDGPUCtorDtorLowering.cpp diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s +; RUN: llc -global-isel -amdgpu-clear-incompatible-function-bodies=0 -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s ; Make sure legalizer info doesn't assert on dummy targets diff --git a/llvm/test/CodeGen/AMDGPU/clear-incompatible-functions.ll b/llvm/test/CodeGen/AMDGPU/clear-incompatible-functions.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/clear-incompatible-functions.ll @@ -0,0 +1,628 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>%t | FileCheck -check-prefix=GFX7 %s +; RUN: FileCheck --check-prefixes=GFX8-WARN,GFX9-WARN,GFX10-WARN,GFX11-WARN %s < %t + +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s 2>%t | FileCheck -check-prefix=GFX8 %s +; RUN: FileCheck --check-prefixes=GFX9-WARN,GFX10-WARN,GFX11-WARN %s < %t + +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s 2>%t | FileCheck -check-prefix=GFX9 %s +; RUN: FileCheck --check-prefixes=GFX10-WARN,GFX11-WARN %s < %t + +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s 2>%t | FileCheck -check-prefix=GFX10 %s +; RUN: FileCheck --check-prefixes=GFX11-WARN %s < %t + +; Use --fatal-warnings to confirm no diagnostics are emitted for GFX11. +; RUN: llc --fatal-warnings -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s + +; GFX8-WARN: needs_dpp {{.*}} +dpp is not supported on the current target. Deleting function body. +; GFX8-WARN: needs_16bit_insts {{.*}} +16-bit-insts is not supported on the current target. Deleting function body. +; GFX8-WARN: needs_gfx8_insts {{.*}} +gfx8-insts is not supported on the current target. Deleting function body. +; GFX9-WARN: needs_gfx9_insts {{.*}} +gfx9-insts is not supported on the current target. Deleting function body. +; GFX10-WARN: needs_gfx10_insts {{.*}} +gfx10-insts is not supported on the current target. Deleting function body. +; GFX11-WARN: needs_gfx11_insts {{.*}} +gfx11-insts is not supported on the current target. Deleting function body. + +define void @needs_dpp(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) #0 { +; GFX7-LABEL: needs_dpp: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: needs_dpp: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX8-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX8-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX8-NEXT: ; %bb.1: ; %else +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v6 +; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v5, v7, vcc +; GFX8-NEXT: ; implicit-def: $vgpr2 +; GFX8-NEXT: ; %bb.2: ; %Flow +; GFX8-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX8-NEXT: s_cbranch_execz .LBB0_4 +; GFX8-NEXT: ; %bb.3: ; %if +; GFX8-NEXT: flat_load_dwordx2 v[8:9], v[2:3] +; GFX8-NEXT: .LBB0_4: ; %endif +; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[8:9] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: needs_dpp: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX9-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX9-NEXT: ; %bb.1: ; %else +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v4, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v5, v7, vcc +; GFX9-NEXT: ; implicit-def: $vgpr2 +; GFX9-NEXT: ; %bb.2: ; %Flow +; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_cbranch_execz .LBB0_4 +; GFX9-NEXT: ; %bb.3: ; %if +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: .LBB0_4: ; %endif +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: needs_dpp: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5] +; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: s_xor_b32 s4, exec_lo, s4 +; GFX10-NEXT: ; %bb.1: ; %else +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: ; implicit-def: $vgpr2 +; GFX10-NEXT: ; %bb.2: ; %Flow +; GFX10-NEXT: s_andn2_saveexec_b32 s4, s4 +; GFX10-NEXT: s_cbranch_execz .LBB0_4 +; GFX10-NEXT: ; %bb.3: ; %if +; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX10-NEXT: .LBB0_4: ; %endif +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: needs_dpp: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s0, exec_lo +; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX11-NEXT: v_cmpx_ne_u64_e32 0, v[4:5] +; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX11-NEXT: ; %bb.1: ; %else +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; %bb.2: ; %Flow +; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 +; GFX11-NEXT: s_cbranch_execz .LBB0_4 +; GFX11-NEXT: ; %bb.3: ; %if +; GFX11-NEXT: global_load_b64 v[8:9], v[2:3], off +; GFX11-NEXT: .LBB0_4: ; %endif +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +define void @needs_16bit_insts(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) #1 { +; GFX7-LABEL: needs_16bit_insts: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: needs_16bit_insts: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX8-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX8-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX8-NEXT: ; %bb.1: ; %else +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v6 +; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v5, v7, vcc +; GFX8-NEXT: ; implicit-def: $vgpr2 +; GFX8-NEXT: ; %bb.2: ; %Flow +; GFX8-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX8-NEXT: s_cbranch_execz .LBB1_4 +; GFX8-NEXT: ; %bb.3: ; %if +; GFX8-NEXT: flat_load_dwordx2 v[8:9], v[2:3] +; GFX8-NEXT: .LBB1_4: ; %endif +; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[8:9] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: needs_16bit_insts: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX9-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX9-NEXT: ; %bb.1: ; %else +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v4, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v5, v7, vcc +; GFX9-NEXT: ; implicit-def: $vgpr2 +; GFX9-NEXT: ; %bb.2: ; %Flow +; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_cbranch_execz .LBB1_4 +; GFX9-NEXT: ; %bb.3: ; %if +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: .LBB1_4: ; %endif +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: needs_16bit_insts: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5] +; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: s_xor_b32 s4, exec_lo, s4 +; GFX10-NEXT: ; %bb.1: ; %else +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: ; implicit-def: $vgpr2 +; GFX10-NEXT: ; %bb.2: ; %Flow +; GFX10-NEXT: s_andn2_saveexec_b32 s4, s4 +; GFX10-NEXT: s_cbranch_execz .LBB1_4 +; GFX10-NEXT: ; %bb.3: ; %if +; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX10-NEXT: .LBB1_4: ; %endif +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: needs_16bit_insts: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s0, exec_lo +; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX11-NEXT: v_cmpx_ne_u64_e32 0, v[4:5] +; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX11-NEXT: ; %bb.1: ; %else +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; %bb.2: ; %Flow +; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 +; GFX11-NEXT: s_cbranch_execz .LBB1_4 +; GFX11-NEXT: ; %bb.3: ; %if +; GFX11-NEXT: global_load_b64 v[8:9], v[2:3], off +; GFX11-NEXT: .LBB1_4: ; %endif +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +define void @needs_gfx8_insts(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) #2 { +; GFX7-LABEL: needs_gfx8_insts: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: needs_gfx8_insts: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX8-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX8-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX8-NEXT: ; %bb.1: ; %else +; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v6 +; GFX8-NEXT: v_addc_u32_e32 v9, vcc, v5, v7, vcc +; GFX8-NEXT: ; implicit-def: $vgpr2 +; GFX8-NEXT: ; %bb.2: ; %Flow +; GFX8-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX8-NEXT: s_cbranch_execz .LBB2_4 +; GFX8-NEXT: ; %bb.3: ; %if +; GFX8-NEXT: flat_load_dwordx2 v[8:9], v[2:3] +; GFX8-NEXT: .LBB2_4: ; %endif +; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[8:9] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: needs_gfx8_insts: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX9-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX9-NEXT: ; %bb.1: ; %else +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v4, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v5, v7, vcc +; GFX9-NEXT: ; implicit-def: $vgpr2 +; GFX9-NEXT: ; %bb.2: ; %Flow +; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_cbranch_execz .LBB2_4 +; GFX9-NEXT: ; %bb.3: ; %if +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: .LBB2_4: ; %endif +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: needs_gfx8_insts: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5] +; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: s_xor_b32 s4, exec_lo, s4 +; GFX10-NEXT: ; %bb.1: ; %else +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: ; implicit-def: $vgpr2 +; GFX10-NEXT: ; %bb.2: ; %Flow +; GFX10-NEXT: s_andn2_saveexec_b32 s4, s4 +; GFX10-NEXT: s_cbranch_execz .LBB2_4 +; GFX10-NEXT: ; %bb.3: ; %if +; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX10-NEXT: .LBB2_4: ; %endif +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: needs_gfx8_insts: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s0, exec_lo +; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX11-NEXT: v_cmpx_ne_u64_e32 0, v[4:5] +; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX11-NEXT: ; %bb.1: ; %else +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; %bb.2: ; %Flow +; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 +; GFX11-NEXT: s_cbranch_execz .LBB2_4 +; GFX11-NEXT: ; %bb.3: ; %if +; GFX11-NEXT: global_load_b64 v[8:9], v[2:3], off +; GFX11-NEXT: .LBB2_4: ; %endif +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +define void @needs_gfx9_insts(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) #3 { +; GFX7-LABEL: needs_gfx9_insts: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: needs_gfx9_insts: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: needs_gfx9_insts: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX9-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX9-NEXT: ; %bb.1: ; %else +; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v4, v6 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v5, v7, vcc +; GFX9-NEXT: ; implicit-def: $vgpr2 +; GFX9-NEXT: ; %bb.2: ; %Flow +; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX9-NEXT: s_cbranch_execz .LBB3_4 +; GFX9-NEXT: ; %bb.3: ; %if +; GFX9-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX9-NEXT: .LBB3_4: ; %endif +; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: needs_gfx9_insts: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5] +; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: s_xor_b32 s4, exec_lo, s4 +; GFX10-NEXT: ; %bb.1: ; %else +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: ; implicit-def: $vgpr2 +; GFX10-NEXT: ; %bb.2: ; %Flow +; GFX10-NEXT: s_andn2_saveexec_b32 s4, s4 +; GFX10-NEXT: s_cbranch_execz .LBB3_4 +; GFX10-NEXT: ; %bb.3: ; %if +; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX10-NEXT: .LBB3_4: ; %endif +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: needs_gfx9_insts: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s0, exec_lo +; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX11-NEXT: v_cmpx_ne_u64_e32 0, v[4:5] +; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX11-NEXT: ; %bb.1: ; %else +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; %bb.2: ; %Flow +; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 +; GFX11-NEXT: s_cbranch_execz .LBB3_4 +; GFX11-NEXT: ; %bb.3: ; %if +; GFX11-NEXT: global_load_b64 v[8:9], v[2:3], off +; GFX11-NEXT: .LBB3_4: ; %endif +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +define void @needs_gfx10_insts(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) #4 { +; GFX7-LABEL: needs_gfx10_insts: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: needs_gfx10_insts: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: needs_gfx10_insts: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: needs_gfx10_insts: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5] +; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo +; GFX10-NEXT: s_xor_b32 s4, exec_lo, s4 +; GFX10-NEXT: ; %bb.1: ; %else +; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX10-NEXT: ; implicit-def: $vgpr2 +; GFX10-NEXT: ; %bb.2: ; %Flow +; GFX10-NEXT: s_andn2_saveexec_b32 s4, s4 +; GFX10-NEXT: s_cbranch_execz .LBB4_4 +; GFX10-NEXT: ; %bb.3: ; %if +; GFX10-NEXT: global_load_dwordx2 v[8:9], v[2:3], off +; GFX10-NEXT: .LBB4_4: ; %endif +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[8:9], off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: needs_gfx10_insts: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s0, exec_lo +; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX11-NEXT: v_cmpx_ne_u64_e32 0, v[4:5] +; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX11-NEXT: ; %bb.1: ; %else +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; %bb.2: ; %Flow +; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 +; GFX11-NEXT: s_cbranch_execz .LBB4_4 +; GFX11-NEXT: ; %bb.3: ; %if +; GFX11-NEXT: global_load_b64 v[8:9], v[2:3], off +; GFX11-NEXT: .LBB4_4: ; %endif +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +define void @needs_gfx11_insts(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) #5 { +; GFX7-LABEL: needs_gfx11_insts: +; GFX7: ; %bb.0: ; %entry +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: needs_gfx11_insts: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: needs_gfx11_insts: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: needs_gfx11_insts: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: needs_gfx11_insts: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s0, exec_lo +; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 +; GFX11-NEXT: v_cmpx_ne_u64_e32 0, v[4:5] +; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 +; GFX11-NEXT: ; %bb.1: ; %else +; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v4, v6 +; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v5, v7, vcc_lo +; GFX11-NEXT: ; implicit-def: $vgpr2 +; GFX11-NEXT: ; %bb.2: ; %Flow +; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 +; GFX11-NEXT: s_cbranch_execz .LBB5_4 +; GFX11-NEXT: ; %bb.3: ; %if +; GFX11-NEXT: global_load_b64 v[8:9], v[2:3], off +; GFX11-NEXT: .LBB5_4: ; %endif +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = add i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { "target-features"="+dpp" } +attributes #1 = { "target-features"="+16-bit-insts" } +attributes #2 = { "target-features"="+gfx8-insts" } +attributes #3 = { "target-features"="+gfx9-insts" } +attributes #4 = { "target-features"="+gfx10-insts" } +attributes #5 = { "target-features"="+gfx11-insts" } diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -59,6 +59,7 @@ ; GCN-O0-NEXT: Lower SwitchInst's to branches ; GCN-O0-NEXT: Lower invoke and unwind, for unwindless code generators ; GCN-O0-NEXT: Remove unreachable blocks from the CFG +; GCN-O0-NEXT: AMDGPU Clear Incompatible Functions Bodies ; GCN-O0-NEXT: Post-Dominator Tree Construction ; GCN-O0-NEXT: Dominator Tree Construction ; GCN-O0-NEXT: Natural Loop Information @@ -238,6 +239,7 @@ ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Flatten the CFG +; GCN-O1-NEXT: AMDGPU Clear Incompatible Functions Bodies ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Natural Loop Information @@ -525,6 +527,7 @@ ; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-OPTS-NEXT: Function Alias Analysis Results ; GCN-O1-OPTS-NEXT: Flatten the CFG +; GCN-O1-OPTS-NEXT: AMDGPU Clear Incompatible Functions Bodies ; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Natural Loop Information @@ -820,6 +823,7 @@ ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Flatten the CFG +; GCN-O2-NEXT: AMDGPU Clear Incompatible Functions Bodies ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Natural Loop Information @@ -1130,6 +1134,7 @@ ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Flatten the CFG +; GCN-O3-NEXT: AMDGPU Clear Incompatible Functions Bodies ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Natural Loop Information