diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -774,6 +774,7 @@ if (NewI->getParent() == nullptr) { NewI->insertBefore(I); NewI->takeName(I); + NewI->setDebugLoc(I->getDebugLoc()); } } return NewV; diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-nvidia-cuda | %ptxas-verify %} -; // Bitcode int this test case is reduced version of compiled code below: +; // Bitcode in this test case is reduced version of compiled code below: ;__device__ inline void res(float x, float y, float *res) { *res = x + y; } ; ;__global__ void saxpy(int n, float a, float *x, float *y) { @@ -42,12 +42,12 @@ ; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; ; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; +; CHECK: .loc [[DEBUG_INFO_CU]] 8 13 ; CHECK: mul.wide.u32 %rd{{.+}}, %r{{.+}}, 4; ; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}}; -; CHECK: .loc [[DEBUG_INFO_CU]] 8 13 ; CHECK: ld.global.f32 %f{{.+}}, [%rd{{.+}}]; -; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[DEBUG_INFO_CU]] 8 19 +; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}}; ; CHECK: ld.global.f32 %f{{.+}}, [%rd{{.+}}]; ; CHECK: .loc [[DEBUG_INFO_CU]] 3 82 ; CHECK: fma.rn.f32 %f{{.+}}, %f{{.+}}, %f{{.+}}, %f{{.+}}; diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/debug-info.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/debug-info.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/debug-info.ll @@ -0,0 +1,94 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s + +; check that the debug locations are correctly propagated + +@lds = internal unnamed_addr addrspace(3) global [648 x double] undef, align 8 + +; CHECK-LABEL: @load_global_from_flat( +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*, !dbg ![[DEBUG_LOC_TMP0:[0-9]+]] +; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0, align 4, !dbg ![[DEBUG_LOC_TMP1:[0-9]+]] +; CHECK-NEXT: ret float %tmp1, !dbg ![[DEBUG_LOC_RET:[0-9]+]] +define float @load_global_from_flat(float* %generic_scalar) #0 !dbg !5 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*, !dbg !8 + %tmp1 = load float, float addrspace(1)* %tmp0, align 4, !dbg !9 + ret float %tmp1, !dbg !10 +} + +; CHECK-LABEL: @simplified_constexpr_gep_addrspacecast( +; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0, !dbg ![[DEBUG_LOC_GEP0:[0-9]+]] +; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8, !dbg ![[DEBUG_LOC_STORE_GEP0:[0-9]+]] +define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) #0 !dbg !11 { + %gep0 = getelementptr inbounds double, double* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double*), i64 %idx0, !dbg !12 + %asc = addrspacecast double* %gep0 to double addrspace(3)*, !dbg !13 + store double 1.000000e+00, double addrspace(3)* %asc, align 8, !dbg !14 + ret void, !dbg !15 +} + +; CHECK-LABEL: @objectsize_group_to_flat_i32( +; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true, i1 false, i1 false), !dbg ![[DEBUG_LOC_VAL:[0-9]+]] +define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 !dbg !16 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*, !dbg !17 + %val = call i32 @llvm.objectsize.i32.p0i8(i8* %cast, i1 true, i1 false, i1 false), !dbg !18 + ret i32 %val, !dbg !19 +} + +; CHECK-LABEL: @memset_group_to_flat( +; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 32, i1 false), !dbg ![[DEBUG_LOC_MEMSET_CAST:[0-9]+]] +define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 !dbg !20 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*, !dbg !21 + call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !dbg !22, !tbaa !23, !alias.scope !26, !noalias !29 + ret void, !dbg !31 +} + +declare i32 @llvm.objectsize.i32.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #1 + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 + +attributes #0 = { nounwind } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +attributes #2 = { argmemonly nocallback nofree nounwind willreturn writeonly } + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!2, !3} +!llvm.module.flags = !{!4} + +; CHECK: ![[DEBUG_LOC_TMP0]] = !DILocation(line: 1, column: 1, +; CHECK: ![[DEBUG_LOC_TMP1]] = !DILocation(line: 2, column: 1, +; CHECK: ![[DEBUG_LOC_RET]] = !DILocation(line: 3, column: 1, +; CHECK: ![[DEBUG_LOC_GEP0]] = !DILocation(line: 4, column: 1, +; CHECK: ![[DEBUG_LOC_STORE_GEP0]] = !DILocation(line: 6, column: 1, +; CHECK: ![[DEBUG_LOC_VAL]] = !DILocation(line: 9, column: 1, +; CHECK: ![[DEBUG_LOC_MEMSET_CAST]] = !DILocation(line: 12, column: 1, + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "debug_info.pre.ll", directory: "/") +!2 = !{i32 13} +!3 = !{i32 0} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "load_global_from_flat", linkageName: "load_global_from_flat", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !7) +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 1, column: 1, scope: !5) +!9 = !DILocation(line: 2, column: 1, scope: !5) +!10 = !DILocation(line: 3, column: 1, scope: !5) +!11 = distinct !DISubprogram(name: "simplified_constexpr_gep_addrspacecast", linkageName: "simplified_constexpr_gep_addrspacecast", scope: null, file: !1, line: 4, type: !6, scopeLine: 4, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !7) +!12 = !DILocation(line: 4, column: 1, scope: !11) +!13 = !DILocation(line: 5, column: 1, scope: !11) +!14 = !DILocation(line: 6, column: 1, scope: !11) +!15 = !DILocation(line: 7, column: 1, scope: !11) +!16 = distinct !DISubprogram(name: "objectsize_group_to_flat_i32", linkageName: "objectsize_group_to_flat_i32", scope: null, file: !1, line: 8, type: !6, scopeLine: 8, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !7) +!17 = !DILocation(line: 8, column: 1, scope: !16) +!18 = !DILocation(line: 9, column: 1, scope: !16) +!19 = !DILocation(line: 10, column: 1, scope: !16) +!20 = distinct !DISubprogram(name: "memset_group_to_flat", linkageName: "memset_group_to_flat", scope: null, file: !1, line: 11, type: !6, scopeLine: 11, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !7) +!21 = !DILocation(line: 11, column: 1, scope: !20) +!22 = !DILocation(line: 12, column: 1, scope: !20) +!23 = !{!24, !24, i64 0} +!24 = !{!"A", !25} +!25 = !{!"tbaa root"} +!26 = !{!27} +!27 = distinct !{!27, !28, !"some scope 1"} +!28 = distinct !{!28, !"some domain"} +!29 = !{!30} +!30 = distinct !{!30, !28, !"some scope 2"} +!31 = !DILocation(line: 13, column: 1, scope: !20)