diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -901,7 +901,7 @@ if (EnableSROA) addPass(createSROAPass()); - if (EnableScalarIRPasses) + if (EnableScalarIRPasses && TM.getOptLevel() > CodeGenOpt::Less) addStraightLineScalarOptimizationPasses(); if (EnableAMDGPUAliasAnalysis) { @@ -933,7 +933,7 @@ // %1 = shl %a, 2 // // but EarlyCSE can do neither of them. - if (getOptLevel() != CodeGenOpt::None && EnableScalarIRPasses) + if (EnableScalarIRPasses && getOptLevel() > CodeGenOpt::Less) addEarlyCSEOrGVNPass(); } @@ -949,7 +949,7 @@ TargetPassConfig::addCodeGenPrepare(); - if (EnableLoadStoreVectorizer) + if (EnableLoadStoreVectorizer && TM->getOptLevel() > CodeGenOpt::Less) addPass(createLoadStoreVectorizerPass()); // LowerSwitch pass may introduce unreachable blocks that can @@ -1072,7 +1072,7 @@ if (EnableDPPCombine) addPass(&GCNDPPCombineID); addPass(&SILoadStoreOptimizerID); - if (EnableSDWAPeephole) { + if (EnableSDWAPeephole && getOptLevel() > CodeGenOpt::Less) { addPass(&SIPeepholeSDWAID); addPass(&EarlyMachineLICMID); addPass(&MachineCSEID); diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -0,0 +1,987 @@ +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure %s 2>&1 | FileCheck -check-prefix=GCN-O0 %s +; RUN: llc -O1 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure %s 2>&1 | FileCheck -check-prefix=GCN-O1 %s +; RUN: llc -O2 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure %s 2>&1 | FileCheck -check-prefix=GCN-O2 %s +; RUN: llc -O3 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure %s 2>&1 | FileCheck -check-prefix=GCN-O3 %s + +; REQUIRES: asserts + +; GCN-O0: Pass Arguments: -targetlibinfo -targetpassconfig -machinemoduleinfo -tti -assumption-cache-tracker -profile-summary-info -collector-metadata -amdgpu-argument-reg-usage-info -reg-usage-info -machine-branch-prob -pre-isel-intrinsic-lowering -amdgpu-printf-runtime-binding -amdgpu-fix-function-bitcasts -amdgpu-propagate-attributes-early -atomic-expand -amdgpu-lower-intrinsics -amdgpu-always-inline -basiccg -always-inline -barrier -amdgpu-lower-enqueued-block -amdgpu-lower-module-lds -domtree -postdomtree -loops -divergence -amdgpu-codegenprepare -gc-lowering -shadow-stack-gc-lowering -lower-constant-intrinsics -unreachableblockelim -scalarize-masked-mem-intrin -expand-reductions -basiccg -amdgpu-annotate-kernel-features -amdgpu-lower-kernel-arguments -amdgpu-perf-hint -rewrite-symbols -lazy-value-info -lowerswitch -lowerinvoke -unreachableblockelim -domtree -basic-aa -aa -flattencfg -domtree -postdomtree -loops -divergence -amdgpu-late-codegenprepare -amdgpu-unify-divergent-exit-nodes -lazy-value-info -lowerswitch -domtree -loops -fix-irreducible -unify-loop-exits -postdomtree -domfrontier -regions -structurizecfg -basic-aa -aa -loops -sink -postdomtree -divergence -phi-values -aa -memdep -amdgpu-annotate-uniform -si-annotate-control-flow -loops -lcssa-verification -lcssa -basiccg -DummyCGSCCPass -safe-stack -stack-protector -domtree -postdomtree -loops -divergence -amdgpu-isel -machinedomtree -si-fix-sgpr-copies -machinepostdomtree -si-i1-copies -finalize-isel -localstackalloc -reg-usage-propagation -phi-node-elimination -si-lower-control-flow -twoaddressinstruction -basic-aa -aa -machinedomtree -slotindexes -liveintervals -machinepostdomtree -si-wqm -virtregmap -liveregmatrix -si-pre-allocate-wwm-regs -regallocfast -si-fix-vgpr-copies -si-lower-sgpr-spills -fixup-statepoint-caller-saved -lazy-machine-block-freq -machine-opt-remark-emitter -prologepilog -postrapseudos -si-post-ra-bundler -fentry-insert -xray-instrumentation -si-memory-legalizer -machinepostdomtree -si-insert-waitcnts -si-shrink-instructions -si-mode-register -machinedomtree -si-late-branch-lowering -post-RA-hazard-rec -branch-relaxation -RegUsageInfoCollector -livedebugvalues -lazy-machine-block-freq -machine-opt-remark-emitter +; GCN-O0-NEXT: Target Library Information +; GCN-O0-NEXT: Target Pass Configuration +; GCN-O0-NEXT: Machine Module Information +; GCN-O0-NEXT: Target Transform Information +; GCN-O0-NEXT: Assumption Cache Tracker +; GCN-O0-NEXT: Profile summary info +; GCN-O0-NEXT: Create Garbage Collector Module Metadata +; GCN-O0-NEXT: Argument Register Usage Information Storage +; GCN-O0-NEXT: Register Usage Information Storage +; GCN-O0-NEXT: Machine Branch Probability Analysis +; GCN-O0-NEXT: ModulePass Manager +; GCN-O0-NEXT: Pre-ISel Intrinsic Lowering +; GCN-O0-NEXT: AMDGPU Printf lowering +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Fix function bitcasts for AMDGPU +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Early propagate attributes from kernels to functions +; GCN-O0-NEXT: Expand Atomic instructions +; GCN-O0-NEXT: AMDGPU Lower Intrinsics +; GCN-O0-NEXT: AMDGPU Inline All Functions +; GCN-O0-NEXT: CallGraph Construction +; GCN-O0-NEXT: Call Graph SCC Pass Manager +; GCN-O0-NEXT: Inliner for always_inline functions +; GCN-O0-NEXT: A No-Op Barrier Pass +; GCN-O0-NEXT: Lower OpenCL enqueued blocks +; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Post-Dominator Tree Construction +; GCN-O0-NEXT: Natural Loop Information +; GCN-O0-NEXT: Legacy Divergence Analysis +; GCN-O0-NEXT: AMDGPU IR optimizations +; GCN-O0-NEXT: Lower Garbage Collection Instructions +; GCN-O0-NEXT: Shadow Stack GC Lowering +; GCN-O0-NEXT: Lower constant intrinsics +; GCN-O0-NEXT: Remove unreachable blocks from the CFG +; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics +; GCN-O0-NEXT: Expand reduction intrinsics +; GCN-O0-NEXT: CallGraph Construction +; GCN-O0-NEXT: Call Graph SCC Pass Manager +; GCN-O0-NEXT: AMDGPU Annotate Kernel Features +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: AMDGPU Lower Kernel Arguments +; GCN-O0-NEXT: Analysis if a function is memory bound +; GCN-O0-NEXT: Rewrite Symbols +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Lazy Value Information Analysis +; GCN-O0-NEXT: Lower SwitchInst's to branches +; GCN-O0-NEXT: Lower invoke and unwind, for unwindless code generators +; GCN-O0-NEXT: Remove unreachable blocks from the CFG +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O0-NEXT: Function Alias Analysis Results +; GCN-O0-NEXT: Flatten the CFG +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Post-Dominator Tree Construction +; GCN-O0-NEXT: Natural Loop Information +; GCN-O0-NEXT: Legacy Divergence Analysis +; GCN-O0-NEXT: AMDGPU IR late optimizations +; GCN-O0-NEXT: Unify divergent function exit nodes +; GCN-O0-NEXT: Lazy Value Information Analysis +; GCN-O0-NEXT: Lower SwitchInst's to branches +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Natural Loop Information +; GCN-O0-NEXT: Convert irreducible control-flow into natural loops +; GCN-O0-NEXT: Fixup each natural loop to have a single exit block +; GCN-O0-NEXT: Post-Dominator Tree Construction +; GCN-O0-NEXT: Dominance Frontier Construction +; GCN-O0-NEXT: Detect single entry single exit regions +; GCN-O0-NEXT: Region Pass Manager +; GCN-O0-NEXT: Structurize control flow +; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O0-NEXT: Function Alias Analysis Results +; GCN-O0-NEXT: Natural Loop Information +; GCN-O0-NEXT: Code sinking +; GCN-O0-NEXT: Post-Dominator Tree Construction +; GCN-O0-NEXT: Legacy Divergence Analysis +; GCN-O0-NEXT: Phi Values Analysis +; GCN-O0-NEXT: Function Alias Analysis Results +; GCN-O0-NEXT: Memory Dependence Analysis +; GCN-O0-NEXT: AMDGPU Annotate Uniform Values +; GCN-O0-NEXT: SI annotate control flow +; GCN-O0-NEXT: Natural Loop Information +; GCN-O0-NEXT: LCSSA Verifier +; GCN-O0-NEXT: Loop-Closed SSA Form Pass +; GCN-O0-NEXT: CallGraph Construction +; GCN-O0-NEXT: Call Graph SCC Pass Manager +; GCN-O0-NEXT: DummyCGSCCPass +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Safe Stack instrumentation pass +; GCN-O0-NEXT: Insert stack protectors +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Post-Dominator Tree Construction +; GCN-O0-NEXT: Natural Loop Information +; GCN-O0-NEXT: Legacy Divergence Analysis +; GCN-O0-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection +; GCN-O0-NEXT: MachineDominator Tree Construction +; GCN-O0-NEXT: SI Fix SGPR copies +; GCN-O0-NEXT: MachinePostDominator Tree Construction +; GCN-O0-NEXT: SI Lower i1 Copies +; GCN-O0-NEXT: Finalize ISel and expand pseudo-instructions +; GCN-O0-NEXT: Local Stack Slot Allocation +; GCN-O0-NEXT: Register Usage Information Propagation +; GCN-O0-NEXT: Eliminate PHI nodes for register allocation +; GCN-O0-NEXT: SI Lower control flow pseudo instructions +; GCN-O0-NEXT: Two-Address instruction pass +; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O0-NEXT: Function Alias Analysis Results +; GCN-O0-NEXT: MachineDominator Tree Construction +; GCN-O0-NEXT: Slot index numbering +; GCN-O0-NEXT: Live Interval Analysis +; GCN-O0-NEXT: MachinePostDominator Tree Construction +; GCN-O0-NEXT: SI Whole Quad Mode +; GCN-O0-NEXT: Virtual Register Map +; GCN-O0-NEXT: Live Register Matrix +; GCN-O0-NEXT: SI Pre-allocate WWM Registers +; GCN-O0-NEXT: Fast Register Allocator +; GCN-O0-NEXT: SI Fix VGPR copies +; GCN-O0-NEXT: SI lower SGPR spill instructions +; GCN-O0-NEXT: Fixup Statepoint Caller Saved +; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O0-NEXT: Machine Optimization Remark Emitter +; GCN-O0-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O0-NEXT: Post-RA pseudo instruction expansion pass +; GCN-O0-NEXT: SI post-RA bundler +; GCN-O0-NEXT: Insert fentry calls +; GCN-O0-NEXT: Insert XRay ops +; GCN-O0-NEXT: SI Memory Legalizer +; GCN-O0-NEXT: MachinePostDominator Tree Construction +; GCN-O0-NEXT: SI insert wait instructions +; GCN-O0-NEXT: SI Shrink Instructions +; GCN-O0-NEXT: Insert required mode register values +; GCN-O0-NEXT: MachineDominator Tree Construction +; GCN-O0-NEXT: SI Final Branch Preparation +; GCN-O0-NEXT: Post RA hazard recognizer +; GCN-O0-NEXT: Branch relaxation pass +; GCN-O0-NEXT: Register Usage Information Collector Pass +; GCN-O0-NEXT: Live DEBUG_VALUE analysis +; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O0-NEXT: Machine Optimization Remark Emitter +; GCN-O0-NEXT: AMDGPU Assembly Printer +; GCN-O0-NEXT: Free MachineFunction +; GCN-O0-NEXT: Pass Arguments: -domtree +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Dominator Tree Construction + +; GCN-O1: Pass Arguments: -targetlibinfo -targetpassconfig -machinemoduleinfo -tti -assumption-cache-tracker -profile-summary-info -amdgpu-aa -external-aa -tbaa -scoped-noalias-aa -collector-metadata -amdgpu-argument-reg-usage-info -machine-branch-prob -reg-usage-info -pre-isel-intrinsic-lowering -amdgpu-printf-runtime-binding -amdgpu-fix-function-bitcasts -amdgpu-propagate-attributes-early -atomic-expand -amdgpu-lower-intrinsics -amdgpu-always-inline -basiccg -always-inline -barrier -amdgpu-lower-enqueued-block -amdgpu-lower-module-lds -infer-address-spaces -amdgpu-promote-alloca -domtree -sroa -postdomtree -loops -divergence -amdgpu-codegenprepare -basic-aa -loop-simplify -scalar-evolution -canon-freeze -iv-users -loop-reduce -basic-aa -aa -mergeicmps -loops -lazy-branch-prob -lazy-block-freq -expandmemcmp -gc-lowering -shadow-stack-gc-lowering -lower-constant-intrinsics -unreachableblockelim -loops -postdomtree -branch-prob -block-freq -consthoist -replace-with-veclib -partially-inline-libcalls -scalarize-masked-mem-intrin -expand-reductions -basiccg -amdgpu-annotate-kernel-features -amdgpu-lower-kernel-arguments -amdgpu-perf-hint -domtree -loops -codegenprepare -rewrite-symbols -lazy-value-info -lowerswitch -lowerinvoke -unreachableblockelim -domtree -basic-aa -aa -flattencfg -domtree -postdomtree -loops -divergence -amdgpu-late-codegenprepare -amdgpu-unify-divergent-exit-nodes -lazy-value-info -lowerswitch -domtree -loops -fix-irreducible -unify-loop-exits -postdomtree -domfrontier -regions -structurizecfg -basic-aa -aa -loops -sink -postdomtree -divergence -phi-values -aa -memdep -amdgpu-annotate-uniform -si-annotate-control-flow -loops -lcssa-verification -lcssa -basiccg -DummyCGSCCPass -safe-stack -stack-protector -domtree -postdomtree -loops -divergence -basic-aa -aa -branch-prob -lazy-branch-prob -lazy-block-freq -amdgpu-isel -machinedomtree -si-fix-sgpr-copies -machinepostdomtree -si-i1-copies -finalize-isel -lazy-machine-block-freq -early-tailduplication -opt-phis -slotindexes -stack-coloring -localstackalloc -dead-mi-elimination -machinedomtree -machine-loops -machine-block-freq -early-machinelicm -machinedomtree -machine-block-freq -machine-cse -machinepostdomtree -machine-sink -peephole-opt -dead-mi-elimination -si-fold-operands -gcn-dpp-combine -si-load-store-opt -dead-mi-elimination -si-shrink-instructions -reg-usage-propagation -detect-dead-lanes -dead-mi-elimination -processimpdefs -unreachable-mbb-elimination -livevars -phi-node-elimination -si-lower-control-flow -twoaddressinstruction -machinedomtree -slotindexes -liveintervals -machine-loops -simple-register-coalescing -rename-independent-subregs -machine-scheduler -machinepostdomtree -si-wqm -virtregmap -liveregmatrix -si-pre-allocate-wwm-regs -si-optimize-exec-masking-pre-ra -si-form-memory-clauses -machine-loops -machine-block-freq -livedebugvars -livestacks -virtregmap -liveregmatrix -edge-bundles -spill-code-placement -lazy-machine-block-freq -machine-opt-remark-emitter -greedy -amdgpu-nsa-reassign -virtregrewriter -stack-slot-coloring -machine-cp -machinelicm -si-fix-vgpr-copies -si-optimize-exec-masking -si-lower-sgpr-spills -fixup-statepoint-caller-saved -postra-machine-sink -machinedomtree -machine-loops -machine-block-freq -machinepostdomtree -lazy-machine-block-freq -machine-opt-remark-emitter -shrink-wrap -prologepilog -branch-folder -lazy-machine-block-freq -tailduplication -machine-cp -postrapseudos -si-post-ra-bundler -machinedomtree -machine-loops -post-RA-sched -machine-block-freq -machinepostdomtree -block-placement -fentry-insert -xray-instrumentation -si-memory-legalizer -machinepostdomtree -si-insert-waitcnts -si-shrink-instructions -si-mode-register -si-insert-hard-clauses -machinedomtree -si-late-branch-lowering -si-pre-emit-peephole -post-RA-hazard-rec -branch-relaxation -RegUsageInfoCollector -livedebugvalues -lazy-machine-block-freq -machine-opt-remark-emitter +; GCN-O1-NEXT: Target Library Information +; GCN-O1-NEXT: Target Pass Configuration +; GCN-O1-NEXT: Machine Module Information +; GCN-O1-NEXT: Target Transform Information +; GCN-O1-NEXT: Assumption Cache Tracker +; GCN-O1-NEXT: Profile summary info +; GCN-O1-NEXT: AMDGPU Address space based Alias Analysis +; GCN-O1-NEXT: External Alias Analysis +; GCN-O1-NEXT: Type-Based Alias Analysis +; GCN-O1-NEXT: Scoped NoAlias Alias Analysis +; GCN-O1-NEXT: Create Garbage Collector Module Metadata +; GCN-O1-NEXT: Argument Register Usage Information Storage +; GCN-O1-NEXT: Machine Branch Probability Analysis +; GCN-O1-NEXT: Register Usage Information Storage +; GCN-O1-NEXT: ModulePass Manager +; GCN-O1-NEXT: Pre-ISel Intrinsic Lowering +; GCN-O1-NEXT: AMDGPU Printf lowering +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Fix function bitcasts for AMDGPU +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Early propagate attributes from kernels to functions +; GCN-O1-NEXT: Expand Atomic instructions +; GCN-O1-NEXT: AMDGPU Lower Intrinsics +; GCN-O1-NEXT: AMDGPU Inline All Functions +; GCN-O1-NEXT: CallGraph Construction +; GCN-O1-NEXT: Call Graph SCC Pass Manager +; GCN-O1-NEXT: Inliner for always_inline functions +; GCN-O1-NEXT: A No-Op Barrier Pass +; GCN-O1-NEXT: Lower OpenCL enqueued blocks +; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Infer address spaces +; GCN-O1-NEXT: AMDGPU Promote Alloca +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: SROA +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Legacy Divergence Analysis +; GCN-O1-NEXT: AMDGPU IR optimizations +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Canonicalize natural loops +; GCN-O1-NEXT: Scalar Evolution Analysis +; GCN-O1-NEXT: Loop Pass Manager +; GCN-O1-NEXT: Canonicalize Freeze Instructions in Loops +; GCN-O1-NEXT: Induction Variable Users +; GCN-O1-NEXT: Loop Strength Reduction +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results +; GCN-O1-NEXT: Merge contiguous icmps into a memcmp +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Lazy Branch Probability Analysis +; GCN-O1-NEXT: Lazy Block Frequency Analysis +; GCN-O1-NEXT: Expand memcmp() to load/stores +; GCN-O1-NEXT: Lower Garbage Collection Instructions +; GCN-O1-NEXT: Shadow Stack GC Lowering +; GCN-O1-NEXT: Lower constant intrinsics +; GCN-O1-NEXT: Remove unreachable blocks from the CFG +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Branch Probability Analysis +; GCN-O1-NEXT: Block Frequency Analysis +; GCN-O1-NEXT: Constant Hoisting +; GCN-O1-NEXT: Replace intrinsics with calls to vector library +; GCN-O1-NEXT: Partially inline calls to library functions +; GCN-O1-NEXT: Scalarize Masked Memory Intrinsics +; GCN-O1-NEXT: Expand reduction intrinsics +; GCN-O1-NEXT: CallGraph Construction +; GCN-O1-NEXT: Call Graph SCC Pass Manager +; GCN-O1-NEXT: AMDGPU Annotate Kernel Features +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: AMDGPU Lower Kernel Arguments +; GCN-O1-NEXT: Analysis if a function is memory bound +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: CodeGen Prepare +; GCN-O1-NEXT: Rewrite Symbols +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Lazy Value Information Analysis +; GCN-O1-NEXT: Lower SwitchInst's to branches +; GCN-O1-NEXT: Lower invoke and unwind, for unwindless code generators +; GCN-O1-NEXT: Remove unreachable blocks from the CFG +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results +; GCN-O1-NEXT: Flatten the CFG +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Legacy Divergence Analysis +; GCN-O1-NEXT: AMDGPU IR late optimizations +; GCN-O1-NEXT: Unify divergent function exit nodes +; GCN-O1-NEXT: Lazy Value Information Analysis +; GCN-O1-NEXT: Lower SwitchInst's to branches +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Convert irreducible control-flow into natural loops +; GCN-O1-NEXT: Fixup each natural loop to have a single exit block +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Dominance Frontier Construction +; GCN-O1-NEXT: Detect single entry single exit regions +; GCN-O1-NEXT: Region Pass Manager +; GCN-O1-NEXT: Structurize control flow +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Code sinking +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Legacy Divergence Analysis +; GCN-O1-NEXT: Phi Values Analysis +; GCN-O1-NEXT: Function Alias Analysis Results +; GCN-O1-NEXT: Memory Dependence Analysis +; GCN-O1-NEXT: AMDGPU Annotate Uniform Values +; GCN-O1-NEXT: SI annotate control flow +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: LCSSA Verifier +; GCN-O1-NEXT: Loop-Closed SSA Form Pass +; GCN-O1-NEXT: CallGraph Construction +; GCN-O1-NEXT: Call Graph SCC Pass Manager +; GCN-O1-NEXT: DummyCGSCCPass +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Safe Stack instrumentation pass +; GCN-O1-NEXT: Insert stack protectors +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Post-Dominator Tree Construction +; GCN-O1-NEXT: Natural Loop Information +; GCN-O1-NEXT: Legacy Divergence Analysis +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results +; GCN-O1-NEXT: Branch Probability Analysis +; GCN-O1-NEXT: Lazy Branch Probability Analysis +; GCN-O1-NEXT: Lazy Block Frequency Analysis +; GCN-O1-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection +; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: SI Fix SGPR copies +; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: SI Lower i1 Copies +; GCN-O1-NEXT: Finalize ISel and expand pseudo-instructions +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: Early Tail Duplication +; GCN-O1-NEXT: Optimize machine instruction PHIs +; GCN-O1-NEXT: Slot index numbering +; GCN-O1-NEXT: Merge disjoint stack slots +; GCN-O1-NEXT: Local Stack Slot Allocation +; GCN-O1-NEXT: Remove dead machine instructions +; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: Machine Natural Loop Construction +; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: Early Machine Loop Invariant Code Motion +; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: Machine Common Subexpression Elimination +; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: Machine code sinking +; GCN-O1-NEXT: Peephole Optimizations +; GCN-O1-NEXT: Remove dead machine instructions +; GCN-O1-NEXT: SI Fold Operands +; GCN-O1-NEXT: GCN DPP Combine +; GCN-O1-NEXT: SI Load Store Optimizer +; GCN-O1-NEXT: Remove dead machine instructions +; GCN-O1-NEXT: SI Shrink Instructions +; GCN-O1-NEXT: Register Usage Information Propagation +; GCN-O1-NEXT: Detect Dead Lanes +; GCN-O1-NEXT: Remove dead machine instructions +; GCN-O1-NEXT: Process Implicit Definitions +; GCN-O1-NEXT: Remove unreachable machine basic blocks +; GCN-O1-NEXT: Live Variable Analysis +; GCN-O1-NEXT: Eliminate PHI nodes for register allocation +; GCN-O1-NEXT: SI Lower control flow pseudo instructions +; GCN-O1-NEXT: Two-Address instruction pass +; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: Slot index numbering +; GCN-O1-NEXT: Live Interval Analysis +; GCN-O1-NEXT: Machine Natural Loop Construction +; GCN-O1-NEXT: Simple Register Coalescing +; GCN-O1-NEXT: Rename Disconnected Subregister Components +; GCN-O1-NEXT: Machine Instruction Scheduler +; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: SI Whole Quad Mode +; GCN-O1-NEXT: Virtual Register Map +; GCN-O1-NEXT: Live Register Matrix +; GCN-O1-NEXT: SI Pre-allocate WWM Registers +; GCN-O1-NEXT: SI optimize exec mask operations pre-RA +; GCN-O1-NEXT: SI Form memory clauses +; GCN-O1-NEXT: Machine Natural Loop Construction +; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: Debug Variable Analysis +; GCN-O1-NEXT: Live Stack Slot Analysis +; GCN-O1-NEXT: Virtual Register Map +; GCN-O1-NEXT: Live Register Matrix +; GCN-O1-NEXT: Bundle Machine CFG Edges +; GCN-O1-NEXT: Spill Code Placement Analysis +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: Machine Optimization Remark Emitter +; GCN-O1-NEXT: Greedy Register Allocator +; GCN-O1-NEXT: GCN NSA Reassign +; GCN-O1-NEXT: Virtual Register Rewriter +; GCN-O1-NEXT: Stack Slot Coloring +; GCN-O1-NEXT: Machine Copy Propagation Pass +; GCN-O1-NEXT: Machine Loop Invariant Code Motion +; GCN-O1-NEXT: SI Fix VGPR copies +; GCN-O1-NEXT: SI optimize exec mask operations +; GCN-O1-NEXT: SI lower SGPR spill instructions +; GCN-O1-NEXT: Fixup Statepoint Caller Saved +; GCN-O1-NEXT: PostRA Machine Sink +; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: Machine Natural Loop Construction +; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: Machine Optimization Remark Emitter +; GCN-O1-NEXT: Shrink Wrapping analysis +; GCN-O1-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O1-NEXT: Control Flow Optimizer +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: Tail Duplication +; GCN-O1-NEXT: Machine Copy Propagation Pass +; GCN-O1-NEXT: Post-RA pseudo instruction expansion pass +; GCN-O1-NEXT: SI post-RA bundler +; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: Machine Natural Loop Construction +; GCN-O1-NEXT: Post RA top-down list latency scheduler +; GCN-O1-NEXT: Machine Block Frequency Analysis +; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: Branch Probability Basic Block Placement +; GCN-O1-NEXT: Insert fentry calls +; GCN-O1-NEXT: Insert XRay ops +; GCN-O1-NEXT: SI Memory Legalizer +; GCN-O1-NEXT: MachinePostDominator Tree Construction +; GCN-O1-NEXT: SI insert wait instructions +; GCN-O1-NEXT: SI Shrink Instructions +; GCN-O1-NEXT: Insert required mode register values +; GCN-O1-NEXT: SI Insert Hard Clauses +; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: SI Final Branch Preparation +; GCN-O1-NEXT: SI peephole optimizations +; GCN-O1-NEXT: Post RA hazard recognizer +; GCN-O1-NEXT: Branch relaxation pass +; GCN-O1-NEXT: Register Usage Information Collector Pass +; GCN-O1-NEXT: Live DEBUG_VALUE analysis +; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O1-NEXT: Machine Optimization Remark Emitter +; GCN-O1-NEXT: AMDGPU Assembly Printer +; GCN-O1-NEXT: Free MachineFunction +; GCN-O1-NEXT: Pass Arguments: -domtree +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction + +; GCN-O2: Pass Arguments: -targetlibinfo -targetpassconfig -machinemoduleinfo -tti -assumption-cache-tracker -profile-summary-info -amdgpu-aa -external-aa -tbaa -scoped-noalias-aa -collector-metadata -amdgpu-argument-reg-usage-info -machine-branch-prob -reg-usage-info -pre-isel-intrinsic-lowering -amdgpu-printf-runtime-binding -amdgpu-fix-function-bitcasts -amdgpu-propagate-attributes-early -atomic-expand -amdgpu-lower-intrinsics -amdgpu-always-inline -basiccg -always-inline -barrier -amdgpu-lower-enqueued-block -amdgpu-lower-module-lds -infer-address-spaces -amdgpu-promote-alloca -domtree -sroa -basic-aa -aa -memoryssa -loops -loop-simplify -lcssa-verification -lcssa -scalar-evolution -lazy-branch-prob -lazy-block-freq -licm -separate-const-offset-from-gep -speculative-execution -scalar-evolution -slsr -early-cse -scalar-evolution -nary-reassociate -early-cse -postdomtree -divergence -amdgpu-codegenprepare -loop-simplify -scalar-evolution -canon-freeze -iv-users -loop-reduce -basic-aa -aa -mergeicmps -loops -lazy-branch-prob -lazy-block-freq -expandmemcmp -gc-lowering -shadow-stack-gc-lowering -lower-constant-intrinsics -unreachableblockelim -loops -postdomtree -branch-prob -block-freq -consthoist -replace-with-veclib -partially-inline-libcalls -scalarize-masked-mem-intrin -expand-reductions -early-cse -basiccg -amdgpu-annotate-kernel-features -amdgpu-lower-kernel-arguments -amdgpu-perf-hint -domtree -loops -codegenprepare -rewrite-symbols -domtree -basic-aa -aa -loops -scalar-evolution -load-store-vectorizer -lazy-value-info -lowerswitch -lowerinvoke -unreachableblockelim -domtree -basic-aa -aa -flattencfg -domtree -postdomtree -loops -divergence -amdgpu-late-codegenprepare -amdgpu-unify-divergent-exit-nodes -lazy-value-info -lowerswitch -domtree -loops -fix-irreducible -unify-loop-exits -postdomtree -domfrontier -regions -structurizecfg -basic-aa -aa -loops -sink -postdomtree -divergence -phi-values -aa -memdep -amdgpu-annotate-uniform -si-annotate-control-flow -loops -lcssa-verification -lcssa -basiccg -DummyCGSCCPass -safe-stack -stack-protector -domtree -postdomtree -loops -divergence -basic-aa -aa -branch-prob -lazy-branch-prob -lazy-block-freq -amdgpu-isel -machinedomtree -si-fix-sgpr-copies -machinepostdomtree -si-i1-copies -finalize-isel -lazy-machine-block-freq -early-tailduplication -opt-phis -slotindexes -stack-coloring -localstackalloc -dead-mi-elimination -machinedomtree -machine-loops -machine-block-freq -early-machinelicm -machinedomtree -machine-block-freq -machine-cse -machinepostdomtree -machine-sink -peephole-opt -dead-mi-elimination -si-fold-operands -gcn-dpp-combine -si-load-store-opt -si-peephole-sdwa -machine-block-freq -machinedomtree -early-machinelicm -machinedomtree -machine-block-freq -machine-cse -si-fold-operands -dead-mi-elimination -si-shrink-instructions -reg-usage-propagation -detect-dead-lanes -dead-mi-elimination -processimpdefs -unreachable-mbb-elimination -livevars -phi-node-elimination -si-lower-control-flow -twoaddressinstruction -machinedomtree -slotindexes -liveintervals -machine-loops -simple-register-coalescing -rename-independent-subregs -machine-scheduler -machinepostdomtree -si-wqm -virtregmap -liveregmatrix -si-pre-allocate-wwm-regs -si-optimize-exec-masking-pre-ra -si-form-memory-clauses -machine-loops -machine-block-freq -livedebugvars -livestacks -virtregmap -liveregmatrix -edge-bundles -spill-code-placement -lazy-machine-block-freq -machine-opt-remark-emitter -greedy -amdgpu-nsa-reassign -virtregrewriter -stack-slot-coloring -machine-cp -machinelicm -si-fix-vgpr-copies -si-optimize-exec-masking -si-lower-sgpr-spills -fixup-statepoint-caller-saved -postra-machine-sink -machinedomtree -machine-loops -machine-block-freq -machinepostdomtree -lazy-machine-block-freq -machine-opt-remark-emitter -shrink-wrap -prologepilog -branch-folder -lazy-machine-block-freq -tailduplication -machine-cp -postrapseudos -si-post-ra-bundler -machinedomtree -machine-loops -post-RA-sched -machine-block-freq -machinepostdomtree -block-placement -fentry-insert -xray-instrumentation -si-memory-legalizer -machinepostdomtree -si-insert-waitcnts -si-shrink-instructions -si-mode-register -si-insert-hard-clauses -machinedomtree -si-late-branch-lowering -si-pre-emit-peephole -post-RA-hazard-rec -branch-relaxation -RegUsageInfoCollector -livedebugvalues -lazy-machine-block-freq -machine-opt-remark-emitter +; GCN-O2-NEXT: Target Library Information +; GCN-O2-NEXT: Target Pass Configuration +; GCN-O2-NEXT: Machine Module Information +; GCN-O2-NEXT: Target Transform Information +; GCN-O2-NEXT: Assumption Cache Tracker +; GCN-O2-NEXT: Profile summary info +; GCN-O2-NEXT: AMDGPU Address space based Alias Analysis +; GCN-O2-NEXT: External Alias Analysis +; GCN-O2-NEXT: Type-Based Alias Analysis +; GCN-O2-NEXT: Scoped NoAlias Alias Analysis +; GCN-O2-NEXT: Create Garbage Collector Module Metadata +; GCN-O2-NEXT: Argument Register Usage Information Storage +; GCN-O2-NEXT: Machine Branch Probability Analysis +; GCN-O2-NEXT: Register Usage Information Storage +; GCN-O2-NEXT: ModulePass Manager +; GCN-O2-NEXT: Pre-ISel Intrinsic Lowering +; GCN-O2-NEXT: AMDGPU Printf lowering +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Fix function bitcasts for AMDGPU +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Early propagate attributes from kernels to functions +; GCN-O2-NEXT: Expand Atomic instructions +; GCN-O2-NEXT: AMDGPU Lower Intrinsics +; GCN-O2-NEXT: AMDGPU Inline All Functions +; GCN-O2-NEXT: CallGraph Construction +; GCN-O2-NEXT: Call Graph SCC Pass Manager +; GCN-O2-NEXT: Inliner for always_inline functions +; GCN-O2-NEXT: A No-Op Barrier Pass +; GCN-O2-NEXT: Lower OpenCL enqueued blocks +; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Infer address spaces +; GCN-O2-NEXT: AMDGPU Promote Alloca +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: SROA +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Memory SSA +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Canonicalize natural loops +; GCN-O2-NEXT: LCSSA Verifier +; GCN-O2-NEXT: Loop-Closed SSA Form Pass +; GCN-O2-NEXT: Scalar Evolution Analysis +; GCN-O2-NEXT: Lazy Branch Probability Analysis +; GCN-O2-NEXT: Lazy Block Frequency Analysis +; GCN-O2-NEXT: Loop Pass Manager +; GCN-O2-NEXT: Loop Invariant Code Motion +; GCN-O2-NEXT: Split GEPs to a variadic base and a constant offset for better CSE +; GCN-O2-NEXT: Speculatively execute instructions +; GCN-O2-NEXT: Scalar Evolution Analysis +; GCN-O2-NEXT: Straight line strength reduction +; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: Scalar Evolution Analysis +; GCN-O2-NEXT: Nary reassociation +; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Legacy Divergence Analysis +; GCN-O2-NEXT: AMDGPU IR optimizations +; GCN-O2-NEXT: Canonicalize natural loops +; GCN-O2-NEXT: Scalar Evolution Analysis +; GCN-O2-NEXT: Loop Pass Manager +; GCN-O2-NEXT: Canonicalize Freeze Instructions in Loops +; GCN-O2-NEXT: Induction Variable Users +; GCN-O2-NEXT: Loop Strength Reduction +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Merge contiguous icmps into a memcmp +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Lazy Branch Probability Analysis +; GCN-O2-NEXT: Lazy Block Frequency Analysis +; GCN-O2-NEXT: Expand memcmp() to load/stores +; GCN-O2-NEXT: Lower Garbage Collection Instructions +; GCN-O2-NEXT: Shadow Stack GC Lowering +; GCN-O2-NEXT: Lower constant intrinsics +; GCN-O2-NEXT: Remove unreachable blocks from the CFG +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Branch Probability Analysis +; GCN-O2-NEXT: Block Frequency Analysis +; GCN-O2-NEXT: Constant Hoisting +; GCN-O2-NEXT: Replace intrinsics with calls to vector library +; GCN-O2-NEXT: Partially inline calls to library functions +; GCN-O2-NEXT: Scalarize Masked Memory Intrinsics +; GCN-O2-NEXT: Expand reduction intrinsics +; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: CallGraph Construction +; GCN-O2-NEXT: Call Graph SCC Pass Manager +; GCN-O2-NEXT: AMDGPU Annotate Kernel Features +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: AMDGPU Lower Kernel Arguments +; GCN-O2-NEXT: Analysis if a function is memory bound +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: CodeGen Prepare +; GCN-O2-NEXT: Rewrite Symbols +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Scalar Evolution Analysis +; GCN-O2-NEXT: GPU Load and Store Vectorizer +; GCN-O2-NEXT: Lazy Value Information Analysis +; GCN-O2-NEXT: Lower SwitchInst's to branches +; GCN-O2-NEXT: Lower invoke and unwind, for unwindless code generators +; GCN-O2-NEXT: Remove unreachable blocks from the CFG +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Flatten the CFG +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Legacy Divergence Analysis +; GCN-O2-NEXT: AMDGPU IR late optimizations +; GCN-O2-NEXT: Unify divergent function exit nodes +; GCN-O2-NEXT: Lazy Value Information Analysis +; GCN-O2-NEXT: Lower SwitchInst's to branches +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Convert irreducible control-flow into natural loops +; GCN-O2-NEXT: Fixup each natural loop to have a single exit block +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Dominance Frontier Construction +; GCN-O2-NEXT: Detect single entry single exit regions +; GCN-O2-NEXT: Region Pass Manager +; GCN-O2-NEXT: Structurize control flow +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Code sinking +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Legacy Divergence Analysis +; GCN-O2-NEXT: Phi Values Analysis +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Memory Dependence Analysis +; GCN-O2-NEXT: AMDGPU Annotate Uniform Values +; GCN-O2-NEXT: SI annotate control flow +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: LCSSA Verifier +; GCN-O2-NEXT: Loop-Closed SSA Form Pass +; GCN-O2-NEXT: CallGraph Construction +; GCN-O2-NEXT: Call Graph SCC Pass Manager +; GCN-O2-NEXT: DummyCGSCCPass +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Safe Stack instrumentation pass +; GCN-O2-NEXT: Insert stack protectors +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Post-Dominator Tree Construction +; GCN-O2-NEXT: Natural Loop Information +; GCN-O2-NEXT: Legacy Divergence Analysis +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Branch Probability Analysis +; GCN-O2-NEXT: Lazy Branch Probability Analysis +; GCN-O2-NEXT: Lazy Block Frequency Analysis +; GCN-O2-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: SI Fix SGPR copies +; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: SI Lower i1 Copies +; GCN-O2-NEXT: Finalize ISel and expand pseudo-instructions +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: Early Tail Duplication +; GCN-O2-NEXT: Optimize machine instruction PHIs +; GCN-O2-NEXT: Slot index numbering +; GCN-O2-NEXT: Merge disjoint stack slots +; GCN-O2-NEXT: Local Stack Slot Allocation +; GCN-O2-NEXT: Remove dead machine instructions +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: Machine Natural Loop Construction +; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: Early Machine Loop Invariant Code Motion +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: Machine Common Subexpression Elimination +; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: Machine code sinking +; GCN-O2-NEXT: Peephole Optimizations +; GCN-O2-NEXT: Remove dead machine instructions +; GCN-O2-NEXT: SI Fold Operands +; GCN-O2-NEXT: GCN DPP Combine +; GCN-O2-NEXT: SI Load Store Optimizer +; GCN-O2-NEXT: SI Peephole SDWA +; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: Early Machine Loop Invariant Code Motion +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: Machine Common Subexpression Elimination +; GCN-O2-NEXT: SI Fold Operands +; GCN-O2-NEXT: Remove dead machine instructions +; GCN-O2-NEXT: SI Shrink Instructions +; GCN-O2-NEXT: Register Usage Information Propagation +; GCN-O2-NEXT: Detect Dead Lanes +; GCN-O2-NEXT: Remove dead machine instructions +; GCN-O2-NEXT: Process Implicit Definitions +; GCN-O2-NEXT: Remove unreachable machine basic blocks +; GCN-O2-NEXT: Live Variable Analysis +; GCN-O2-NEXT: Eliminate PHI nodes for register allocation +; GCN-O2-NEXT: SI Lower control flow pseudo instructions +; GCN-O2-NEXT: Two-Address instruction pass +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: Slot index numbering +; GCN-O2-NEXT: Live Interval Analysis +; GCN-O2-NEXT: Machine Natural Loop Construction +; GCN-O2-NEXT: Simple Register Coalescing +; GCN-O2-NEXT: Rename Disconnected Subregister Components +; GCN-O2-NEXT: Machine Instruction Scheduler +; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: SI Whole Quad Mode +; GCN-O2-NEXT: Virtual Register Map +; GCN-O2-NEXT: Live Register Matrix +; GCN-O2-NEXT: SI Pre-allocate WWM Registers +; GCN-O2-NEXT: SI optimize exec mask operations pre-RA +; GCN-O2-NEXT: SI Form memory clauses +; GCN-O2-NEXT: Machine Natural Loop Construction +; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: Debug Variable Analysis +; GCN-O2-NEXT: Live Stack Slot Analysis +; GCN-O2-NEXT: Virtual Register Map +; GCN-O2-NEXT: Live Register Matrix +; GCN-O2-NEXT: Bundle Machine CFG Edges +; GCN-O2-NEXT: Spill Code Placement Analysis +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: Machine Optimization Remark Emitter +; GCN-O2-NEXT: Greedy Register Allocator +; GCN-O2-NEXT: GCN NSA Reassign +; GCN-O2-NEXT: Virtual Register Rewriter +; GCN-O2-NEXT: Stack Slot Coloring +; GCN-O2-NEXT: Machine Copy Propagation Pass +; GCN-O2-NEXT: Machine Loop Invariant Code Motion +; GCN-O2-NEXT: SI Fix VGPR copies +; GCN-O2-NEXT: SI optimize exec mask operations +; GCN-O2-NEXT: SI lower SGPR spill instructions +; GCN-O2-NEXT: Fixup Statepoint Caller Saved +; GCN-O2-NEXT: PostRA Machine Sink +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: Machine Natural Loop Construction +; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: Machine Optimization Remark Emitter +; GCN-O2-NEXT: Shrink Wrapping analysis +; GCN-O2-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O2-NEXT: Control Flow Optimizer +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: Tail Duplication +; GCN-O2-NEXT: Machine Copy Propagation Pass +; GCN-O2-NEXT: Post-RA pseudo instruction expansion pass +; GCN-O2-NEXT: SI post-RA bundler +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: Machine Natural Loop Construction +; GCN-O2-NEXT: Post RA top-down list latency scheduler +; GCN-O2-NEXT: Machine Block Frequency Analysis +; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: Branch Probability Basic Block Placement +; GCN-O2-NEXT: Insert fentry calls +; GCN-O2-NEXT: Insert XRay ops +; GCN-O2-NEXT: SI Memory Legalizer +; GCN-O2-NEXT: MachinePostDominator Tree Construction +; GCN-O2-NEXT: SI insert wait instructions +; GCN-O2-NEXT: SI Shrink Instructions +; GCN-O2-NEXT: Insert required mode register values +; GCN-O2-NEXT: SI Insert Hard Clauses +; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: SI Final Branch Preparation +; GCN-O2-NEXT: SI peephole optimizations +; GCN-O2-NEXT: Post RA hazard recognizer +; GCN-O2-NEXT: Branch relaxation pass +; GCN-O2-NEXT: Register Usage Information Collector Pass +; GCN-O2-NEXT: Live DEBUG_VALUE analysis +; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O2-NEXT: Machine Optimization Remark Emitter +; GCN-O2-NEXT: AMDGPU Assembly Printer +; GCN-O2-NEXT: Free MachineFunction +; GCN-O2-NEXT: Pass Arguments: -domtree +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction + +; GCN-O3: Pass Arguments: -targetlibinfo -targetpassconfig -machinemoduleinfo -tti -assumption-cache-tracker -profile-summary-info -amdgpu-aa -external-aa -tbaa -scoped-noalias-aa -collector-metadata -amdgpu-argument-reg-usage-info -machine-branch-prob -reg-usage-info -pre-isel-intrinsic-lowering -amdgpu-printf-runtime-binding -amdgpu-fix-function-bitcasts -amdgpu-propagate-attributes-early -atomic-expand -amdgpu-lower-intrinsics -amdgpu-always-inline -basiccg -always-inline -barrier -amdgpu-lower-enqueued-block -amdgpu-lower-module-lds -infer-address-spaces -amdgpu-promote-alloca -domtree -sroa -basic-aa -aa -memoryssa -loops -loop-simplify -lcssa-verification -lcssa -scalar-evolution -lazy-branch-prob -lazy-block-freq -licm -separate-const-offset-from-gep -speculative-execution -scalar-evolution -slsr -phi-values -aa -memdep -opt-remark-emitter -gvn -scalar-evolution -nary-reassociate -early-cse -postdomtree -divergence -amdgpu-codegenprepare -basic-aa -loop-simplify -scalar-evolution -canon-freeze -iv-users -loop-reduce -basic-aa -aa -mergeicmps -loops -lazy-branch-prob -lazy-block-freq -expandmemcmp -gc-lowering -shadow-stack-gc-lowering -lower-constant-intrinsics -unreachableblockelim -loops -postdomtree -branch-prob -block-freq -consthoist -replace-with-veclib -partially-inline-libcalls -scalarize-masked-mem-intrin -expand-reductions -loops -phi-values -basic-aa -aa -memdep -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -gvn -basiccg -amdgpu-annotate-kernel-features -amdgpu-lower-kernel-arguments -amdgpu-perf-hint -domtree -loops -codegenprepare -rewrite-symbols -domtree -basic-aa -aa -loops -scalar-evolution -load-store-vectorizer -lazy-value-info -lowerswitch -lowerinvoke -unreachableblockelim -domtree -basic-aa -aa -flattencfg -domtree -postdomtree -loops -divergence -amdgpu-late-codegenprepare -amdgpu-unify-divergent-exit-nodes -lazy-value-info -lowerswitch -domtree -loops -fix-irreducible -unify-loop-exits -postdomtree -domfrontier -regions -structurizecfg -basic-aa -aa -loops -sink -postdomtree -divergence -phi-values -aa -memdep -amdgpu-annotate-uniform -si-annotate-control-flow -loops -lcssa-verification -lcssa -basiccg -DummyCGSCCPass -safe-stack -stack-protector -domtree -postdomtree -loops -divergence -basic-aa -aa -branch-prob -lazy-branch-prob -lazy-block-freq -amdgpu-isel -machinedomtree -si-fix-sgpr-copies -machinepostdomtree -si-i1-copies -finalize-isel -lazy-machine-block-freq -early-tailduplication -opt-phis -slotindexes -stack-coloring -localstackalloc -dead-mi-elimination -machinedomtree -machine-loops -machine-block-freq -early-machinelicm -machinedomtree -machine-block-freq -machine-cse -machinepostdomtree -machine-sink -peephole-opt -dead-mi-elimination -si-fold-operands -gcn-dpp-combine -si-load-store-opt -si-peephole-sdwa -machine-block-freq -machinedomtree -early-machinelicm -machinedomtree -machine-block-freq -machine-cse -si-fold-operands -dead-mi-elimination -si-shrink-instructions -reg-usage-propagation -detect-dead-lanes -dead-mi-elimination -processimpdefs -unreachable-mbb-elimination -livevars -phi-node-elimination -si-lower-control-flow -twoaddressinstruction -machinedomtree -slotindexes -liveintervals -machine-loops -simple-register-coalescing -rename-independent-subregs -machine-scheduler -machinepostdomtree -si-wqm -virtregmap -liveregmatrix -si-pre-allocate-wwm-regs -si-optimize-exec-masking-pre-ra -si-form-memory-clauses -machine-loops -machine-block-freq -livedebugvars -livestacks -virtregmap -liveregmatrix -edge-bundles -spill-code-placement -lazy-machine-block-freq -machine-opt-remark-emitter -greedy -amdgpu-nsa-reassign -virtregrewriter -stack-slot-coloring -machine-cp -machinelicm -si-fix-vgpr-copies -si-optimize-exec-masking -si-lower-sgpr-spills -fixup-statepoint-caller-saved -postra-machine-sink -machinedomtree -machine-loops -machine-block-freq -machinepostdomtree -lazy-machine-block-freq -machine-opt-remark-emitter -shrink-wrap -prologepilog -branch-folder -lazy-machine-block-freq -tailduplication -machine-cp -postrapseudos -si-post-ra-bundler -machinedomtree -machine-loops -post-RA-sched -machine-block-freq -machinepostdomtree -block-placement -fentry-insert -xray-instrumentation -si-memory-legalizer -machinepostdomtree -si-insert-waitcnts -si-shrink-instructions -si-mode-register -si-insert-hard-clauses -machinedomtree -si-late-branch-lowering -si-pre-emit-peephole -post-RA-hazard-rec -branch-relaxation -RegUsageInfoCollector -livedebugvalues -lazy-machine-block-freq -machine-opt-remark-emitter +; GCN-O3-NEXT: Target Library Information +; GCN-O3-NEXT: Target Pass Configuration +; GCN-O3-NEXT: Machine Module Information +; GCN-O3-NEXT: Target Transform Information +; GCN-O3-NEXT: Assumption Cache Tracker +; GCN-O3-NEXT: Profile summary info +; GCN-O3-NEXT: AMDGPU Address space based Alias Analysis +; GCN-O3-NEXT: External Alias Analysis +; GCN-O3-NEXT: Type-Based Alias Analysis +; GCN-O3-NEXT: Scoped NoAlias Alias Analysis +; GCN-O3-NEXT: Create Garbage Collector Module Metadata +; GCN-O3-NEXT: Argument Register Usage Information Storage +; GCN-O3-NEXT: Machine Branch Probability Analysis +; GCN-O3-NEXT: Register Usage Information Storage +; GCN-O3-NEXT: ModulePass Manager +; GCN-O3-NEXT: Pre-ISel Intrinsic Lowering +; GCN-O3-NEXT: AMDGPU Printf lowering +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Fix function bitcasts for AMDGPU +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Early propagate attributes from kernels to functions +; GCN-O3-NEXT: Expand Atomic instructions +; GCN-O3-NEXT: AMDGPU Lower Intrinsics +; GCN-O3-NEXT: AMDGPU Inline All Functions +; GCN-O3-NEXT: CallGraph Construction +; GCN-O3-NEXT: Call Graph SCC Pass Manager +; GCN-O3-NEXT: Inliner for always_inline functions +; GCN-O3-NEXT: A No-Op Barrier Pass +; GCN-O3-NEXT: Lower OpenCL enqueued blocks +; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Infer address spaces +; GCN-O3-NEXT: AMDGPU Promote Alloca +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: SROA +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory SSA +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Canonicalize natural loops +; GCN-O3-NEXT: LCSSA Verifier +; GCN-O3-NEXT: Loop-Closed SSA Form Pass +; GCN-O3-NEXT: Scalar Evolution Analysis +; GCN-O3-NEXT: Lazy Branch Probability Analysis +; GCN-O3-NEXT: Lazy Block Frequency Analysis +; GCN-O3-NEXT: Loop Pass Manager +; GCN-O3-NEXT: Loop Invariant Code Motion +; GCN-O3-NEXT: Split GEPs to a variadic base and a constant offset for better CSE +; GCN-O3-NEXT: Speculatively execute instructions +; GCN-O3-NEXT: Scalar Evolution Analysis +; GCN-O3-NEXT: Straight line strength reduction +; GCN-O3-NEXT: Phi Values Analysis +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory Dependence Analysis +; GCN-O3-NEXT: Optimization Remark Emitter +; GCN-O3-NEXT: Global Value Numbering +; GCN-O3-NEXT: Scalar Evolution Analysis +; GCN-O3-NEXT: Nary reassociation +; GCN-O3-NEXT: Early CSE +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Legacy Divergence Analysis +; GCN-O3-NEXT: AMDGPU IR optimizations +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Canonicalize natural loops +; GCN-O3-NEXT: Scalar Evolution Analysis +; GCN-O3-NEXT: Loop Pass Manager +; GCN-O3-NEXT: Canonicalize Freeze Instructions in Loops +; GCN-O3-NEXT: Induction Variable Users +; GCN-O3-NEXT: Loop Strength Reduction +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Merge contiguous icmps into a memcmp +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Lazy Branch Probability Analysis +; GCN-O3-NEXT: Lazy Block Frequency Analysis +; GCN-O3-NEXT: Expand memcmp() to load/stores +; GCN-O3-NEXT: Lower Garbage Collection Instructions +; GCN-O3-NEXT: Shadow Stack GC Lowering +; GCN-O3-NEXT: Lower constant intrinsics +; GCN-O3-NEXT: Remove unreachable blocks from the CFG +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Branch Probability Analysis +; GCN-O3-NEXT: Block Frequency Analysis +; GCN-O3-NEXT: Constant Hoisting +; GCN-O3-NEXT: Replace intrinsics with calls to vector library +; GCN-O3-NEXT: Partially inline calls to library functions +; GCN-O3-NEXT: Scalarize Masked Memory Intrinsics +; GCN-O3-NEXT: Expand reduction intrinsics +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Phi Values Analysis +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory Dependence Analysis +; GCN-O3-NEXT: Lazy Branch Probability Analysis +; GCN-O3-NEXT: Lazy Block Frequency Analysis +; GCN-O3-NEXT: Optimization Remark Emitter +; GCN-O3-NEXT: Global Value Numbering +; GCN-O3-NEXT: CallGraph Construction +; GCN-O3-NEXT: Call Graph SCC Pass Manager +; GCN-O3-NEXT: AMDGPU Annotate Kernel Features +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: AMDGPU Lower Kernel Arguments +; GCN-O3-NEXT: Analysis if a function is memory bound +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: CodeGen Prepare +; GCN-O3-NEXT: Rewrite Symbols +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Scalar Evolution Analysis +; GCN-O3-NEXT: GPU Load and Store Vectorizer +; GCN-O3-NEXT: Lazy Value Information Analysis +; GCN-O3-NEXT: Lower SwitchInst's to branches +; GCN-O3-NEXT: Lower invoke and unwind, for unwindless code generators +; GCN-O3-NEXT: Remove unreachable blocks from the CFG +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Flatten the CFG +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Legacy Divergence Analysis +; GCN-O3-NEXT: AMDGPU IR late optimizations +; GCN-O3-NEXT: Unify divergent function exit nodes +; GCN-O3-NEXT: Lazy Value Information Analysis +; GCN-O3-NEXT: Lower SwitchInst's to branches +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Convert irreducible control-flow into natural loops +; GCN-O3-NEXT: Fixup each natural loop to have a single exit block +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Dominance Frontier Construction +; GCN-O3-NEXT: Detect single entry single exit regions +; GCN-O3-NEXT: Region Pass Manager +; GCN-O3-NEXT: Structurize control flow +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Code sinking +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Legacy Divergence Analysis +; GCN-O3-NEXT: Phi Values Analysis +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Memory Dependence Analysis +; GCN-O3-NEXT: AMDGPU Annotate Uniform Values +; GCN-O3-NEXT: SI annotate control flow +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: LCSSA Verifier +; GCN-O3-NEXT: Loop-Closed SSA Form Pass +; GCN-O3-NEXT: CallGraph Construction +; GCN-O3-NEXT: Call Graph SCC Pass Manager +; GCN-O3-NEXT: DummyCGSCCPass +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Safe Stack instrumentation pass +; GCN-O3-NEXT: Insert stack protectors +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Post-Dominator Tree Construction +; GCN-O3-NEXT: Natural Loop Information +; GCN-O3-NEXT: Legacy Divergence Analysis +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Branch Probability Analysis +; GCN-O3-NEXT: Lazy Branch Probability Analysis +; GCN-O3-NEXT: Lazy Block Frequency Analysis +; GCN-O3-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: SI Fix SGPR copies +; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: SI Lower i1 Copies +; GCN-O3-NEXT: Finalize ISel and expand pseudo-instructions +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: Early Tail Duplication +; GCN-O3-NEXT: Optimize machine instruction PHIs +; GCN-O3-NEXT: Slot index numbering +; GCN-O3-NEXT: Merge disjoint stack slots +; GCN-O3-NEXT: Local Stack Slot Allocation +; GCN-O3-NEXT: Remove dead machine instructions +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: Machine Natural Loop Construction +; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: Early Machine Loop Invariant Code Motion +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: Machine Common Subexpression Elimination +; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: Machine code sinking +; GCN-O3-NEXT: Peephole Optimizations +; GCN-O3-NEXT: Remove dead machine instructions +; GCN-O3-NEXT: SI Fold Operands +; GCN-O3-NEXT: GCN DPP Combine +; GCN-O3-NEXT: SI Load Store Optimizer +; GCN-O3-NEXT: SI Peephole SDWA +; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: Early Machine Loop Invariant Code Motion +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: Machine Common Subexpression Elimination +; GCN-O3-NEXT: SI Fold Operands +; GCN-O3-NEXT: Remove dead machine instructions +; GCN-O3-NEXT: SI Shrink Instructions +; GCN-O3-NEXT: Register Usage Information Propagation +; GCN-O3-NEXT: Detect Dead Lanes +; GCN-O3-NEXT: Remove dead machine instructions +; GCN-O3-NEXT: Process Implicit Definitions +; GCN-O3-NEXT: Remove unreachable machine basic blocks +; GCN-O3-NEXT: Live Variable Analysis +; GCN-O3-NEXT: Eliminate PHI nodes for register allocation +; GCN-O3-NEXT: SI Lower control flow pseudo instructions +; GCN-O3-NEXT: Two-Address instruction pass +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: Slot index numbering +; GCN-O3-NEXT: Live Interval Analysis +; GCN-O3-NEXT: Machine Natural Loop Construction +; GCN-O3-NEXT: Simple Register Coalescing +; GCN-O3-NEXT: Rename Disconnected Subregister Components +; GCN-O3-NEXT: Machine Instruction Scheduler +; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: SI Whole Quad Mode +; GCN-O3-NEXT: Virtual Register Map +; GCN-O3-NEXT: Live Register Matrix +; GCN-O3-NEXT: SI Pre-allocate WWM Registers +; GCN-O3-NEXT: SI optimize exec mask operations pre-RA +; GCN-O3-NEXT: SI Form memory clauses +; GCN-O3-NEXT: Machine Natural Loop Construction +; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: Debug Variable Analysis +; GCN-O3-NEXT: Live Stack Slot Analysis +; GCN-O3-NEXT: Virtual Register Map +; GCN-O3-NEXT: Live Register Matrix +; GCN-O3-NEXT: Bundle Machine CFG Edges +; GCN-O3-NEXT: Spill Code Placement Analysis +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: Machine Optimization Remark Emitter +; GCN-O3-NEXT: Greedy Register Allocator +; GCN-O3-NEXT: GCN NSA Reassign +; GCN-O3-NEXT: Virtual Register Rewriter +; GCN-O3-NEXT: Stack Slot Coloring +; GCN-O3-NEXT: Machine Copy Propagation Pass +; GCN-O3-NEXT: Machine Loop Invariant Code Motion +; GCN-O3-NEXT: SI Fix VGPR copies +; GCN-O3-NEXT: SI optimize exec mask operations +; GCN-O3-NEXT: SI lower SGPR spill instructions +; GCN-O3-NEXT: Fixup Statepoint Caller Saved +; GCN-O3-NEXT: PostRA Machine Sink +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: Machine Natural Loop Construction +; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: Machine Optimization Remark Emitter +; GCN-O3-NEXT: Shrink Wrapping analysis +; GCN-O3-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; GCN-O3-NEXT: Control Flow Optimizer +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: Tail Duplication +; GCN-O3-NEXT: Machine Copy Propagation Pass +; GCN-O3-NEXT: Post-RA pseudo instruction expansion pass +; GCN-O3-NEXT: SI post-RA bundler +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: Machine Natural Loop Construction +; GCN-O3-NEXT: Post RA top-down list latency scheduler +; GCN-O3-NEXT: Machine Block Frequency Analysis +; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: Branch Probability Basic Block Placement +; GCN-O3-NEXT: Insert fentry calls +; GCN-O3-NEXT: Insert XRay ops +; GCN-O3-NEXT: SI Memory Legalizer +; GCN-O3-NEXT: MachinePostDominator Tree Construction +; GCN-O3-NEXT: SI insert wait instructions +; GCN-O3-NEXT: SI Shrink Instructions +; GCN-O3-NEXT: Insert required mode register values +; GCN-O3-NEXT: SI Insert Hard Clauses +; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: SI Final Branch Preparation +; GCN-O3-NEXT: SI peephole optimizations +; GCN-O3-NEXT: Post RA hazard recognizer +; GCN-O3-NEXT: Branch relaxation pass +; GCN-O3-NEXT: Register Usage Information Collector Pass +; GCN-O3-NEXT: Live DEBUG_VALUE analysis +; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis +; GCN-O3-NEXT: Machine Optimization Remark Emitter +; GCN-O3-NEXT: AMDGPU Assembly Printer +; GCN-O3-NEXT: Free MachineFunction +; GCN-O3-NEXT: Pass Arguments: -domtree +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction + +define void @empty() { + ret void +}