Skip to content

Commit add05a8

Browse files
committedSep 1, 2016
AMDGPU: Add runtime metadata for pointee alignment of argument.
Add runtime metdata for pointee alignment of pointer type kernel argument. The key is KeyArgPointeeAlign and the value is a 32 bit unsigned integer. Differential Revision: https://reviews.llvm.org/D24145 llvm-svn: 280399
1 parent d2fb1e4 commit add05a8

File tree

3 files changed

+175
-1
lines changed

3 files changed

+175
-1
lines changed
 

‎llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -886,13 +886,19 @@ void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
886886
unsigned I = Arg.getArgNo();
887887
OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
888888

889-
// Emit KeyArgSize and KeyArgAlign.
889+
// Emit KeyArgSize, KeyArgAlign and KeyArgPointeeAlign.
890890
Type *T = Arg.getType();
891891
const DataLayout &DL = F.getParent()->getDataLayout();
892892
emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgSize,
893893
DL.getTypeAllocSize(T), 4);
894894
emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAlign,
895895
DL.getABITypeAlignment(T), 4);
896+
if (auto PT = dyn_cast<PointerType>(T)) {
897+
auto ET = PT->getElementType();
898+
if (ET->isSized())
899+
emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgPointeeAlign,
900+
DL.getABITypeAlignment(ET), 4);
901+
}
896902

897903
// Emit KeyArgTypeName.
898904
auto TypeName = dyn_cast<MDString>(F.getMetadata(

‎llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ namespace RuntimeMD {
8484
KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
8585
KeyMaxWorkGroupSize = 30, // Maximum work group size
8686
KeyNoPartialWorkGroups = 31, // No partial work groups
87+
KeyArgPointeeAlign = 32, // Alignment of pointee type
8788
};
8889

8990
enum Language : uint8_t {

‎llvm/test/CodeGen/AMDGPU/runtime-metadata.ll

+167
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_spac
217217
; CHECK-NEXT: .long 8
218218
; CHECK-NEXT: .byte 10
219219
; CHECK-NEXT: .long 8
220+
; CHECK-NEXT: .byte 32
221+
; CHECK-NEXT: .long 4
220222
; CHECK-NEXT: .byte 11
221223
; CHECK-NEXT: .long 5
222224
; CHECK-NEXT: .ascii "int *"
@@ -331,6 +333,8 @@ define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_
331333
; CHECK-NEXT: .long 4
332334
; CHECK-NEXT: .byte 10
333335
; CHECK-NEXT: .long 4
336+
; CHECK-NEXT: .byte 32
337+
; CHECK-NEXT: .long 4
334338
; CHECK-NEXT: .byte 11
335339
; CHECK-NEXT: .long 8
336340
; CHECK-NEXT: .ascii "struct A"
@@ -444,6 +448,8 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !ke
444448
; CHECK-NEXT: .long 8
445449
; CHECK-NEXT: .byte 10
446450
; CHECK-NEXT: .long 8
451+
; CHECK-NEXT: .byte 32
452+
; CHECK-NEXT: .long 4
447453
; CHECK-NEXT: .byte 11
448454
; CHECK-NEXT: .long 5
449455
; CHECK-NEXT: .ascii "int *"
@@ -461,6 +467,8 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !ke
461467
; CHECK-NEXT: .long 8
462468
; CHECK-NEXT: .byte 10
463469
; CHECK-NEXT: .long 8
470+
; CHECK-NEXT: .byte 32
471+
; CHECK-NEXT: .long 4
464472
; CHECK-NEXT: .byte 11
465473
; CHECK-NEXT: .long 5
466474
; CHECK-NEXT: .ascii "int *"
@@ -478,6 +486,8 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !ke
478486
; CHECK-NEXT: .long 4
479487
; CHECK-NEXT: .byte 10
480488
; CHECK-NEXT: .long 4
489+
; CHECK-NEXT: .byte 32
490+
; CHECK-NEXT: .long 4
481491
; CHECK-NEXT: .byte 11
482492
; CHECK-NEXT: .long 5
483493
; CHECK-NEXT: .ascii "int *"
@@ -507,6 +517,8 @@ define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2
507517
; CHECK-NEXT: .long 8
508518
; CHECK-NEXT: .byte 10
509519
; CHECK-NEXT: .long 8
520+
; CHECK-NEXT: .byte 32
521+
; CHECK-NEXT: .long 4
510522
; CHECK-NEXT: .byte 11
511523
; CHECK-NEXT: .long 5
512524
; CHECK-NEXT: .ascii "int *"
@@ -525,6 +537,8 @@ define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2
525537
; CHECK-NEXT: .long 8
526538
; CHECK-NEXT: .byte 10
527539
; CHECK-NEXT: .long 8
540+
; CHECK-NEXT: .byte 32
541+
; CHECK-NEXT: .long 4
528542
; CHECK-NEXT: .byte 11
529543
; CHECK-NEXT: .long 5
530544
; CHECK-NEXT: .ascii "int *"
@@ -915,6 +929,8 @@ define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_
915929
; CHECK-NEXT: .long 8
916930
; CHECK-NEXT: .byte 10
917931
; CHECK-NEXT: .long 8
932+
; CHECK-NEXT: .byte 32
933+
; CHECK-NEXT: .long 4
918934
; CHECK-NEXT: .byte 11
919935
; CHECK-NEXT: .long 6
920936
; CHECK-NEXT: .ascii "int **"
@@ -944,6 +960,8 @@ define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_a
944960
; CHECK-NEXT: .long 4
945961
; CHECK-NEXT: .byte 10
946962
; CHECK-NEXT: .long 4
963+
; CHECK-NEXT: .byte 32
964+
; CHECK-NEXT: .long 8
947965
; CHECK-NEXT: .byte 11
948966
; CHECK-NEXT: .long 8
949967
; CHECK-NEXT: .ascii "struct B"
@@ -1018,6 +1036,151 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t add
10181036
ret void
10191037
}
10201038

1039+
; CHECK-LABEL:{{^}}test_pointee_align:
1040+
; CHECK: .section .AMDGPU.runtime_metadata
1041+
; CHECK-NEXT: .byte 4
1042+
; CHECK-NEXT: .byte 6
1043+
; CHECK-NEXT: .long 18
1044+
; CHECK-NEXT: .ascii "test_pointee_align"
1045+
; CHECK-NEXT: .byte 7
1046+
; CHECK-NEXT: .byte 9
1047+
; CHECK-NEXT: .long 8
1048+
; CHECK-NEXT: .byte 10
1049+
; CHECK-NEXT: .long 8
1050+
; CHECK-NEXT: .byte 32
1051+
; CHECK-NEXT: .long 8
1052+
; CHECK-NEXT: .byte 11
1053+
; CHECK-NEXT: .long 6
1054+
; CHECK-NEXT: .ascii "long *"
1055+
; CHECK-NEXT: .byte 13
1056+
; CHECK-NEXT: .byte 1
1057+
; CHECK-NEXT: .byte 14
1058+
; CHECK-NEXT: .short 9
1059+
; CHECK-NEXT: .byte 16
1060+
; CHECK-NEXT: .byte 0
1061+
; CHECK-NEXT: .byte 15
1062+
; CHECK-NEXT: .byte 1
1063+
; CHECK-NEXT: .byte 8
1064+
; CHECK-NEXT: .byte 7
1065+
; CHECK-NEXT: .byte 9
1066+
; CHECK-NEXT: .long 4
1067+
; CHECK-NEXT: .byte 10
1068+
; CHECK-NEXT: .long 4
1069+
; CHECK-NEXT: .byte 32
1070+
; CHECK-NEXT: .long 1
1071+
; CHECK-NEXT: .byte 11
1072+
; CHECK-NEXT: .long 6
1073+
; CHECK-NEXT: .ascii "char *"
1074+
; CHECK-NEXT: .byte 13
1075+
; CHECK-NEXT: .byte 1
1076+
; CHECK-NEXT: .byte 14
1077+
; CHECK-NEXT: .short 1
1078+
; CHECK-NEXT: .byte 16
1079+
; CHECK-NEXT: .byte 0
1080+
; CHECK-NEXT: .byte 15
1081+
; CHECK-NEXT: .byte 3
1082+
; CHECK-NEXT: .byte 8
1083+
; CHECK-NEXT: .byte 7
1084+
; CHECK-NEXT: .byte 9
1085+
; CHECK-NEXT: .long 4
1086+
; CHECK-NEXT: .byte 10
1087+
; CHECK-NEXT: .long 4
1088+
; CHECK-NEXT: .byte 32
1089+
; CHECK-NEXT: .long 2
1090+
; CHECK-NEXT: .byte 11
1091+
; CHECK-NEXT: .long 7
1092+
; CHECK-NEXT: .ascii "char2 *"
1093+
; CHECK-NEXT: .byte 13
1094+
; CHECK-NEXT: .byte 1
1095+
; CHECK-NEXT: .byte 14
1096+
; CHECK-NEXT: .short 1
1097+
; CHECK-NEXT: .byte 16
1098+
; CHECK-NEXT: .byte 0
1099+
; CHECK-NEXT: .byte 15
1100+
; CHECK-NEXT: .byte 3
1101+
; CHECK-NEXT: .byte 8
1102+
; CHECK-NEXT: .byte 7
1103+
; CHECK-NEXT: .byte 9
1104+
; CHECK-NEXT: .long 4
1105+
; CHECK-NEXT: .byte 10
1106+
; CHECK-NEXT: .long 4
1107+
; CHECK-NEXT: .byte 32
1108+
; CHECK-NEXT: .long 4
1109+
; CHECK-NEXT: .byte 11
1110+
; CHECK-NEXT: .long 7
1111+
; CHECK-NEXT: .ascii "char3 *"
1112+
; CHECK-NEXT: .byte 13
1113+
; CHECK-NEXT: .byte 1
1114+
; CHECK-NEXT: .byte 14
1115+
; CHECK-NEXT: .short 1
1116+
; CHECK-NEXT: .byte 16
1117+
; CHECK-NEXT: .byte 0
1118+
; CHECK-NEXT: .byte 15
1119+
; CHECK-NEXT: .byte 3
1120+
; CHECK-NEXT: .byte 8
1121+
; CHECK-NEXT: .byte 7
1122+
; CHECK-NEXT: .byte 9
1123+
; CHECK-NEXT: .long 4
1124+
; CHECK-NEXT: .byte 10
1125+
; CHECK-NEXT: .long 4
1126+
; CHECK-NEXT: .byte 32
1127+
; CHECK-NEXT: .long 4
1128+
; CHECK-NEXT: .byte 11
1129+
; CHECK-NEXT: .long 7
1130+
; CHECK-NEXT: .ascii "char4 *"
1131+
; CHECK-NEXT: .byte 13
1132+
; CHECK-NEXT: .byte 1
1133+
; CHECK-NEXT: .byte 14
1134+
; CHECK-NEXT: .short 1
1135+
; CHECK-NEXT: .byte 16
1136+
; CHECK-NEXT: .byte 0
1137+
; CHECK-NEXT: .byte 15
1138+
; CHECK-NEXT: .byte 3
1139+
; CHECK-NEXT: .byte 8
1140+
; CHECK-NEXT: .byte 7
1141+
; CHECK-NEXT: .byte 9
1142+
; CHECK-NEXT: .long 4
1143+
; CHECK-NEXT: .byte 10
1144+
; CHECK-NEXT: .long 4
1145+
; CHECK-NEXT: .byte 32
1146+
; CHECK-NEXT: .long 8
1147+
; CHECK-NEXT: .byte 11
1148+
; CHECK-NEXT: .long 7
1149+
; CHECK-NEXT: .ascii "char8 *"
1150+
; CHECK-NEXT: .byte 13
1151+
; CHECK-NEXT: .byte 1
1152+
; CHECK-NEXT: .byte 14
1153+
; CHECK-NEXT: .short 1
1154+
; CHECK-NEXT: .byte 16
1155+
; CHECK-NEXT: .byte 0
1156+
; CHECK-NEXT: .byte 15
1157+
; CHECK-NEXT: .byte 3
1158+
; CHECK-NEXT: .byte 8
1159+
; CHECK-NEXT: .byte 7
1160+
; CHECK-NEXT: .byte 9
1161+
; CHECK-NEXT: .long 4
1162+
; CHECK-NEXT: .byte 10
1163+
; CHECK-NEXT: .long 4
1164+
; CHECK-NEXT: .byte 32
1165+
; CHECK-NEXT: .long 16
1166+
; CHECK-NEXT: .byte 11
1167+
; CHECK-NEXT: .long 8
1168+
; CHECK-NEXT: .ascii "char16 *"
1169+
; CHECK-NEXT: .byte 13
1170+
; CHECK-NEXT: .byte 1
1171+
; CHECK-NEXT: .byte 14
1172+
; CHECK-NEXT: .short 1
1173+
; CHECK-NEXT: .byte 16
1174+
; CHECK-NEXT: .byte 0
1175+
; CHECK-NEXT: .byte 15
1176+
; CHECK-NEXT: .byte 3
1177+
; CHECK-NEXT: .byte 8
1178+
; CHECK-NEXT: .byte 5
1179+
1180+
define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
1181+
ret void
1182+
}
1183+
10211184
!1 = !{i32 0}
10221185
!2 = !{!"none"}
10231186
!3 = !{!"int"}
@@ -1063,3 +1226,7 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t add
10631226
!84 = !{!"clk_event_t"}
10641227
!opencl.ocl.version = !{!90}
10651228
!90 = !{i32 2, i32 0}
1229+
!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
1230+
!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
1231+
!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
1232+
!94 = !{!"", !"", !"", !"", !"", !"", !""}

0 commit comments

Comments
 (0)
Please sign in to comment.