Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -886,13 +886,19 @@ unsigned I = Arg.getArgNo(); OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1); - // Emit KeyArgSize and KeyArgAlign. + // Emit KeyArgSize, KeyArgAlign and KeyArgPointeeAlign. Type *T = Arg.getType(); const DataLayout &DL = F.getParent()->getDataLayout(); emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgSize, DL.getTypeAllocSize(T), 4); emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAlign, DL.getABITypeAlignment(T), 4); + if (auto PT = dyn_cast(T)) { + auto ET = PT->getElementType(); + if (ET->isSized()) + emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgPointeeAlign, + DL.getABITypeAlignment(ET), 4); + } // Emit KeyArgTypeName. auto TypeName = dyn_cast(F.getMetadata( Index: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h @@ -84,6 +84,7 @@ KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits KeyMaxWorkGroupSize = 30, // Maximum work group size KeyNoPartialWorkGroups = 31, // No partial work groups + KeyArgPointeeAlign = 32, // Alignment of pointee type }; enum Language : uint8_t { Index: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll +++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll @@ -217,6 +217,8 @@ ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .ascii "int *" @@ -331,6 +333,8 @@ ; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .ascii "struct A" @@ -444,6 +448,8 @@ ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .ascii "int *" @@ -461,6 +467,8 @@ ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .ascii "int *" @@ -478,6 +486,8 @@ ; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .ascii "int *" @@ -507,6 +517,8 @@ ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .ascii "int *" @@ -525,6 +537,8 @@ ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 5 ; CHECK-NEXT: .ascii "int *" @@ -915,6 +929,8 @@ ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 6 ; CHECK-NEXT: .ascii "int **" @@ -944,6 +960,8 @@ ; CHECK-NEXT: .long 4 ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 8 ; CHECK-NEXT: .byte 11 ; CHECK-NEXT: .long 8 ; CHECK-NEXT: .ascii "struct B" @@ -1018,6 +1036,151 @@ ret void } +; CHECK-LABEL:{{^}}test_pointee_align: +; CHECK: .section .AMDGPU.runtime_metadata +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .long 18 +; CHECK-NEXT: .ascii "test_pointee_align" +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .ascii "long *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 9 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .ascii "char *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "char2 *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "char3 *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "char4 *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .ascii "char8 *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .byte 9 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .byte 11 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .ascii "char16 *" +; CHECK-NEXT: .byte 13 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 14 +; CHECK-NEXT: .short 1 +; CHECK-NEXT: .byte 16 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 15 +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 5 + +define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { + ret void +} + !1 = !{i32 0} !2 = !{!"none"} !3 = !{!"int"} @@ -1063,3 +1226,7 @@ !84 = !{!"clk_event_t"} !opencl.ocl.version = !{!90} !90 = !{i32 2, i32 0} +!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3} +!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"} +!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"} +!94 = !{!"", !"", !"", !"", !"", !"", !""}