This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
include/llvm/Frontend/OpenMP/
-
llvm/
-
Frontend/
-
OpenMP/
-
OMPKinds.def
-
lib/Transforms/IPO/
-
Transforms/
-
IPO/
1
OpenMPOpt.cpp
-
test/Transforms/OpenMP/
-
Transforms/
-
OpenMP/
1
globalization_remarks.ll

Differential D88243

[OpenMP] OpenMPOpt Support for Globalization Remarks
ClosedPublic

Authored by jhuber6 on Sep 24 2020, 9:54 AM.

Download Raw Diff

Details

Reviewers

jdoerfert

Commits

rGa22814194e8e: [OpenMP] OpenMPOpt Support for Globalization Remarks

Summary

This patch add support for printing analysis messages relating to data globalization on the GPU. This occurs when data is shared between the threads in a GPU context and must be pushed to global or shared memory.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

jhuber6 created this revision.Sep 24 2020, 9:54 AM

Herald added subscribers: llvm-commits, guansong, hiraditya, yaxunl. · View Herald TranscriptSep 24 2020, 9:54 AM

jhuber6 requested review of this revision.Sep 24 2020, 9:54 AM

Herald added a subscriber: sstefan1. · View Herald TranscriptSep 24 2020, 9:54 AM

Test?

In D88243#2293144, @jdoerfert wrote:

Test?

I tested it just using the example file to see if it works. I tried just calling opt on it but it was giving me some dumb error so I figured I'd just get it set up first.

clang++ -fopenmp clang/tests/OpenMP/declare_target_codegen_globalization.cpp -Rpass=openmp-opt -Rpass-analysis=openmp-opt -S -emit-llvm -fopenmp-targets=nvptx64-nvidia-cuda -O3

jdoerfert added inline comments.Sep 24 2020, 10:18 AM

llvm/lib/Transforms/IPO/OpenMPOpt.cpp
708	Can't we do `foreachuse`?

In D88243#2293147, @jhuber6 wrote:
In D88243#2293144, @jdoerfert wrote:

Test?

I tested it just using the example file to see if it works. I tried just calling opt on it but it was giving me some dumb error so I figured I'd just get it set up first.
clang++ -fopenmp clang/tests/OpenMP/declare_target_codegen_globalization.cpp -Rpass=openmp-opt -Rpass-analysis=openmp-opt -S -emit-llvm -fopenmp-targets=nvptx64-nvidia-cuda -O3

I think the problem is calling opt on the file since it's combined with the nvptx IR. Is there some opt command line option that does that correctly?

Harbormaster completed remote builds in B72834: Diff 294102.Sep 24 2020, 10:24 AM

Adding test case and changing analysis to use forEachUse. The nvptx file doesn't have the line number information after compiling with debugging symbols so the remark just says "unknown." When you get the remarks from clang it seems to print the information more than necessary.

declare_target_codegen_globalization.cpp:7:5: remark: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. [-Rpass-analysis=openmp-opt]
int bar() {
    ^
remark: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. [-Rpass-analysis=openmp-opt]
remark: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. [-Rpass-analysis=openmp-opt]

Herald added a subscriber: ormris. · View Herald TranscriptSep 24 2020, 10:43 AM

Manually adding missing debug info for stack pushing not being generated by Clang. A real solution will require modifying clang's code generation.

Harbormaster completed remote builds in B72866: Diff 294151.Sep 24 2020, 2:00 PM

LGTM

llvm/test/Transforms/OpenMP/globalization_remarks.ll
64	I think you want `!31` but I guess it doesn't really matter.

This revision is now accepted and ready to land.Sep 24 2020, 2:59 PM

This revision was landed with ongoing or failed builds.Sep 24 2020, 3:23 PM

Closed by commit rGa22814194e8e: [OpenMP] OpenMPOpt Support for Globalization Remarks (authored by jhuber6). · Explain Why

This revision was automatically updated to reflect the committed changes.

jhuber6 added a commit: rGa22814194e8e: [OpenMP] OpenMPOpt Support for Globalization Remarks.

Revision Contents

Path

Size

llvm/

include/

llvm/

Frontend/

OpenMP/

OMPKinds.def

9 lines

lib/

Transforms/

IPO/

OpenMPOpt.cpp

30 lines

test/

Transforms/

OpenMP/

globalization_remarks.ll

143 lines

Diff 294188

llvm/include/llvm/Frontend/OpenMP/OMPKinds.def

	Show First 20 Lines • Show All 138 Lines • ▼ Show 20 Lines
	#define OMP_TYPE(VarName, InitValue)			#define OMP_TYPE(VarName, InitValue)
	#endif			#endif

	#define __OMP_TYPE(VarName) OMP_TYPE(VarName, Type::get##VarName##Ty(Ctx))			#define __OMP_TYPE(VarName) OMP_TYPE(VarName, Type::get##VarName##Ty(Ctx))

	__OMP_TYPE(Void)			__OMP_TYPE(Void)
	__OMP_TYPE(Int1)			__OMP_TYPE(Int1)
	__OMP_TYPE(Int8)			__OMP_TYPE(Int8)
				__OMP_TYPE(Int16)
	__OMP_TYPE(Int32)			__OMP_TYPE(Int32)
	__OMP_TYPE(Int64)			__OMP_TYPE(Int64)
	__OMP_TYPE(Int8Ptr)			__OMP_TYPE(Int8Ptr)
				__OMP_TYPE(Int16Ptr)
	__OMP_TYPE(Int32Ptr)			__OMP_TYPE(Int32Ptr)
	__OMP_TYPE(Int64Ptr)			__OMP_TYPE(Int64Ptr)

	OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))			OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))

	#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())			#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())

	__OMP_PTR_TYPE(VoidPtr, Int8)			__OMP_PTR_TYPE(VoidPtr, Int8)
	▲ Show 20 Lines • Show All 337 Lines • ▼ Show 20 Lines
	__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, Int64, Int32,			__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, Int64, Int32,
	VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)			VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
	__OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr)			__OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr)
	__OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,			__OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
	Int64, Int64)			Int64, Int64)
	__OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,			__OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
	/* Int / Int32, / kmp_task_t */ VoidPtr)			/* Int / Int32, / kmp_task_t */ VoidPtr)

				__OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, )
				__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, )
				__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy,
				Int16)
				__OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16)
				__OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr)

	/// Note that device runtime functions (in the following) do not necessarily			/// Note that device runtime functions (in the following) do not necessarily
	/// need attributes as we expect to see the definitions.			/// need attributes as we expect to see the definitions.
	__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)			__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
	__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)			__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)

	__OMP_RTL(__last, false, Void, )			__OMP_RTL(__last, false, Void, )

	#undef __OMP_RTL			#undef __OMP_RTL
	▲ Show 20 Lines • Show All 658 Lines • Show Last 20 Lines

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Show First 20 Lines • Show All 470 Lines • ▼ Show 20 Lines	using OptimizationRemarkGetter =
function_ref<OptimizationRemarkEmitter &(Function *)>;		function_ref<OptimizationRemarkEmitter &(Function *)>;

OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,		OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
OptimizationRemarkGetter OREGetter,		OptimizationRemarkGetter OREGetter,
OMPInformationCache &OMPInfoCache, Attributor &A)		OMPInformationCache &OMPInfoCache, Attributor &A)
: M((SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),		: M((SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}		OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}

		/// Check if any remarks are enabled for openmp-opt
		bool remarksEnabled() {
		auto &Ctx = M.getContext();
		return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
		}

/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.		/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
bool run() {		bool run() {
if (SCC.empty())		if (SCC.empty())
return false;		return false;

bool Changed = false;		bool Changed = false;

LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()		LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
Show All 11 Lines	bool run() {

// Recollect uses, in case Attributor deleted any.		// Recollect uses, in case Attributor deleted any.
OMPInfoCache.recollectUses();		OMPInfoCache.recollectUses();

Changed \|= deduplicateRuntimeCalls();		Changed \|= deduplicateRuntimeCalls();
Changed \|= deleteParallelRegions();		Changed \|= deleteParallelRegions();
if (HideMemoryTransferLatency)		if (HideMemoryTransferLatency)
Changed \|= hideMemTransfersLatency();		Changed \|= hideMemTransfersLatency();
		if (remarksEnabled())
		analysisGlobalization();

return Changed;		return Changed;
}		}

/// Print initial ICV values for testing.		/// Print initial ICV values for testing.
/// FIXME: This should be done from the Attributor once it is added.		/// FIXME: This should be done from the Attributor once it is added.
void printICVs() const {		void printICVs() const {
InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};		InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
▲ Show 20 Lines • Show All 176 Lines • ▼ Show 20 Lines	auto SplitMemTransfers = [&](Use &U, Function &Decl) {
Changed \|= WasSplit;		Changed \|= WasSplit;
return WasSplit;		return WasSplit;
};		};
RFI.foreachUse(SCC, SplitMemTransfers);		RFI.foreachUse(SCC, SplitMemTransfers);

return Changed;		return Changed;
}		}

		void analysisGlobalization() {
		auto &RFI =
		OMPInfoCache.RFIs[OMPRTL___kmpc_data_sharing_coalesced_push_stack];
		jdoerfertUnsubmitted Not Done Reply Inline Actions Can't we do `foreachuse`? jdoerfert: Can't we do `foreachuse`?

		auto checkGlobalization = [&](Use &U, Function &Decl) {
		if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
		auto Remark = [&](OptimizationRemarkAnalysis ORA) {
		return ORA
		<< "Found thread data sharing on the GPU. "
		<< "Expect degraded performance due to data globalization.";
		};
		emitRemark<OptimizationRemarkAnalysis>(CI, "OpenMPGlobalization",
		Remark);
		}

		return false;
		};

		RFI.foreachUse(SCC, checkGlobalization);
		return;
		}

/// Maps the values stored in the offload arrays passed as arguments to		/// Maps the values stored in the offload arrays passed as arguments to
/// \p RuntimeCall into the offload arrays in \p OAs.		/// \p RuntimeCall into the offload arrays in \p OAs.
bool getValuesInOffloadArrays(CallInst &RuntimeCall,		bool getValuesInOffloadArrays(CallInst &RuntimeCall,
MutableArrayRef<OffloadArray> OAs) {		MutableArrayRef<OffloadArray> OAs) {
assert(OAs.size() == 3 && "Need space for three offload arrays!");		assert(OAs.size() == 3 && "Need space for three offload arrays!");

// A runtime call that involves memory offloading looks something like:		// A runtime call that involves memory offloading looks something like:
// call void @__tgt_target_data_begin_mapper(arg0, arg1,		// call void @__tgt_target_data_begin_mapper(arg0, arg1,
▲ Show 20 Lines • Show All 1,305 Lines • Show Last 20 Lines

llvm/test/Transforms/OpenMP/globalization_remarks.ll

This file was added.

				; RUN: opt -passes=openmpopt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 \| FileCheck %s
				; ModuleID = 'declare_target_codegen_globalization.cpp'
				source_filename = "declare_target_codegen_globalization.cpp"
				target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
				target triple = "nvptx64-nvidia-cuda"

				%struct.ident_t = type { i32, i32, i32, i32, i8* }
				%struct._globalized_locals_ty = type { [32 x i32] }

				@0 = private unnamed_addr constant [56 x i8] c";declare_target_codegen_globalization.cpp;maini1;17;1;;\00", align 1
				@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds ([56 x i8], [56 x i8]* @0, i32 0, i32 0) }, align 8
				@__omp_offloading_801_3022563__Z6maini1v_l17_exec_mode = weak constant i8 0
				@llvm.compiler.used = appending global [1 x i8] [i8 @__omp_offloading_801_3022563__Z6maini1v_l17_exec_mode], section "llvm.metadata"

				; CHECK: remark: declare_target_codegen_globalization.cpp:10:1: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
				; CHECK: remark: declare_target_codegen_globalization.cpp:17:1: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.

				; Function Attrs: norecurse nounwind
				define weak void @__omp_offloading_801_3022563__Z6maini1v_l17(i32* nonnull align 4 dereferenceable(4) %a) local_unnamed_addr #0 !dbg !10 {
				entry:
				%nvptx_num_threads = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !dbg !12, !range !13
				tail call void @__kmpc_spmd_kernel_init(i32 %nvptx_num_threads, i16 1, i16 0) #4, !dbg !12
				tail call void @__kmpc_data_sharing_init_stack_spmd() #4, !dbg !12
				%0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1)
				%1 = tail call i8 @__kmpc_is_spmd_exec_mode() #4
				%.not.i.i = icmp eq i8 %1, 0
				br i1 %.not.i.i, label %.non-spmd2.i.i, label %__omp_outlined__.exit

				.non-spmd2.i.i: ; preds = %entry
				%2 = tail call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 128, i16 0) #4, !dbg !12
				tail call void @__kmpc_data_sharing_pop_stack(i8* %2) #4, !dbg !14
				br label %__omp_outlined__.exit, !dbg !14

				__omp_outlined__.exit: ; preds = %entry, %.non-spmd2.i.i
				tail call void @__kmpc_spmd_kernel_deinit_v2(i16 1) #4, !dbg !19
				ret void, !dbg !20
				}

				; Function Attrs: nounwind readnone
				declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #1

				declare void @__kmpc_spmd_kernel_init(i32, i16, i16) local_unnamed_addr

				declare void @__kmpc_data_sharing_init_stack_spmd() local_unnamed_addr

				; Function Attrs: norecurse nounwind readonly
				define hidden i32 @_Z3fooRi(i32* nocapture nonnull readonly align 4 dereferenceable(4) %a) local_unnamed_addr #2 !dbg !21 {
				entry:
				%0 = load i32, i32* %a, align 4, !dbg !22, !tbaa !23
				ret i32 %0, !dbg !27
				}

				; Function Attrs: nounwind
				define hidden i32 @_Z3barv() local_unnamed_addr #3 !dbg !15 {
				entry:
				%a1 = alloca i32, align 4
				%0 = tail call i8 @__kmpc_is_spmd_exec_mode() #4
				%.not = icmp eq i8 %0, 0
				br i1 %.not, label %.non-spmd, label %.exit

				.non-spmd: ; preds = %entry
				%1 = tail call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 128, i16 0) #4, !dbg !31
				%2 = bitcast i8* %1 to %struct._globalized_locals_ty*
				br label %.exit
				jdoerfertUnsubmitted Not Done Reply Inline Actions I think you want `!31` but I guess it doesn't really matter. jdoerfert: I think you want `!31` but I guess it doesn't really matter.

				.exit: ; preds = %entry, %.non-spmd
				%_select_stack = phi %struct._globalized_locals_ty* [ %2, %.non-spmd ], [ null, %entry ]
				%nvptx_tid = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !28
				%nvptx_lane_id = and i32 %nvptx_tid, 31
				%3 = zext i32 %nvptx_lane_id to i64
				%4 = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* %_select_stack, i64 0, i32 0, i64 %3
				%5 = select i1 %.not, i32* %4, i32* %a1
				%6 = load i32, i32* %5, align 4, !dbg !29, !tbaa !23
				br i1 %.not, label %.non-spmd2, label %.exit3, !dbg !31

				.non-spmd2: ; preds = %.exit
				%7 = bitcast %struct._globalized_locals_ty* %_select_stack to i8*, !dbg !31
				tail call void @__kmpc_data_sharing_pop_stack(i8* %7) #4, !dbg !31
				br label %.exit3, !dbg !31

				.exit3: ; preds = %.non-spmd2, %.exit
				ret i32 %6, !dbg !31
				}

				declare i8 @__kmpc_is_spmd_exec_mode() local_unnamed_addr

				declare i8* @__kmpc_data_sharing_coalesced_push_stack(i64, i16) local_unnamed_addr

				; Function Attrs: nounwind readnone
				declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1

				declare void @__kmpc_data_sharing_pop_stack(i8*) local_unnamed_addr

				; Function Attrs: nounwind
				declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #4

				declare void @__kmpc_spmd_kernel_deinit_v2(i16) local_unnamed_addr

				attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_35" "target-features"="+ptx32,+sm_35" "unsafe-fp-math"="false" "use-soft-float"="false" }
				attributes #1 = { nounwind readnone }
				attributes #2 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_35" "target-features"="+ptx32,+sm_35" "unsafe-fp-math"="false" "use-soft-float"="false" }
				attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_35" "target-features"="+ptx32,+sm_35" "unsafe-fp-math"="false" "use-soft-float"="false" }
				attributes #4 = { nounwind }

				!llvm.dbg.cu = !{!0}
				!omp_offload.info = !{!3}
				!nvvm.annotations = !{!4}
				!llvm.module.flags = !{!5, !6, !7, !8}
				!llvm.ident = !{!9}

				!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
				!1 = !DIFile(filename: "declare_target_codegen_globalization.cpp", directory: "/home/jhuber/Documents/llvm-project/clang/test/OpenMP")
				!2 = !{}
				!3 = !{i32 0, i32 2049, i32 50472291, !"_Z6maini1v", i32 17, i32 0}
				!4 = !{void (i32) @__omp_offloading_801_3022563__Z6maini1v_l17, !"kernel", i32 1}
				!5 = !{i32 7, !"Dwarf Version", i32 2}
				!6 = !{i32 2, !"Debug Info Version", i32 3}
				!7 = !{i32 1, !"wchar_size", i32 4}
				!8 = !{i32 7, !"PIC Level", i32 2}
				!9 = !{!"clang version 12.0.0"}
				!10 = distinct !DISubprogram(name: "__omp_offloading_801_3022563__Z6maini1v_l17", scope: !1, file: !1, line: 17, type: !11, scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit \| DISPFlagDefinition \| DISPFlagOptimized, unit: !0, retainedNodes: !2)
				!11 = !DISubroutineType(types: !2)
				!12 = !DILocation(line: 17, column: 1, scope: !10)
				!13 = !{i32 1, i32 1025}
				!14 = !DILocation(line: 10, column: 1, scope: !15, inlinedAt: !16)
				!15 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition \| DISPFlagOptimized, unit: !0, retainedNodes: !2)
				!16 = distinct !DILocation(line: 20, column: 18, scope: !17, inlinedAt: !18)
				!17 = distinct !DISubprogram(name: "__omp_outlined__", scope: !1, file: !1, line: 17, type: !11, scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit \| DISPFlagDefinition \| DISPFlagOptimized, unit: !0, retainedNodes: !2)
				!18 = distinct !DILocation(line: 17, column: 1, scope: !10)
				!19 = !DILocation(line: 17, column: 40, scope: !10)
				!20 = !DILocation(line: 21, column: 3, scope: !10)
				!21 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 5, type: !11, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition \| DISPFlagOptimized, unit: !0, retainedNodes: !2)
				!22 = !DILocation(line: 5, column: 26, scope: !21)
				!23 = !{!24, !24, i64 0}
				!24 = !{!"int", !25, i64 0}
				!25 = !{!"omnipotent char", !26, i64 0}
				!26 = !{!"Simple C++ TBAA"}
				!27 = !DILocation(line: 5, column: 19, scope: !21)
				!28 = !{i32 0, i32 1024}
				!29 = !DILocation(line: 5, column: 26, scope: !21, inlinedAt: !30)
				!30 = distinct !DILocation(line: 9, column: 10, scope: !15)
				!31 = !DILocation(line: 10, column: 1, scope: !15)