Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h @@ -68,6 +68,7 @@ void emitKernelArg(const Argument &Arg); void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind, + unsigned PointeeAlign = 0, StringRef Name = "", StringRef TypeName = "", StringRef BaseTypeName = "", StringRef AccQual = "", StringRef TypeQual = ""); Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp @@ -324,13 +324,26 @@ if (Node && ArgNo < Node->getNumOperands()) TypeQual = cast(Node->getOperand(ArgNo))->getString(); - emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), - getValueKind(Arg.getType(), TypeQual, BaseTypeName), Name, - TypeName, BaseTypeName, AccQual, TypeQual); + Type *Ty = Arg.getType(); + const DataLayout &DL = Func->getParent()->getDataLayout(); + + unsigned PointeeAlign = 0; + if (auto PtrTy = dyn_cast(Ty)) { + if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { + PointeeAlign = Arg.getParamAlignment(); + if (PointeeAlign == 0) + PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType()); + } + } + + emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName), + PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual); } void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, - ValueKind ValueKind, StringRef Name, + ValueKind ValueKind, + unsigned PointeeAlign, + StringRef Name, StringRef TypeName, StringRef BaseTypeName, StringRef AccQual, StringRef TypeQual) { HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); @@ -342,12 +355,7 @@ Arg.mAlign = DL.getABITypeAlignment(Ty); Arg.mValueKind = ValueKind; Arg.mValueType = getValueType(Ty, BaseTypeName); - - if (auto PtrTy = dyn_cast(Ty)) { - auto ElTy = PtrTy->getElementType(); - if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized()) - Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy); - } + Arg.mPointeeAlign = PointeeAlign; if (auto PtrTy = dyn_cast(Ty)) Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1248,6 +1248,13 @@ ; CHECK-NEXT: PointeeAlign: 16 ; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: h +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: PointeeAlign: 1 +; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -1271,12 +1278,118 @@ <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, - <16 x i8> addrspace(3)* %g) + <16 x i8> addrspace(3)* %g, + {} addrspace(3)* %h) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { ret void } +; CHECK: - Name: test_pointee_align_attribute +; CHECK-NEXT: SymbolName: 'test_pointee_align_attribute@kd' +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: 'long addrspace(5)*' +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: b +; CHECK-NEXT: TypeName: 'char addrspace(5)*' +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 8 +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: c +; CHECK-NEXT: TypeName: 'char2 addrspace(5)*' +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 32 +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: d +; CHECK-NEXT: TypeName: 'char3 addrspace(5)*' +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 64 +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: e +; CHECK-NEXT: TypeName: 'char4 addrspace(5)*' +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 256 +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: f +; CHECK-NEXT: TypeName: 'char8 addrspace(5)*' +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 128 +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: g +; CHECK-NEXT: TypeName: 'char16 addrspace(5)*' +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: PointeeAlign: 1024 +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Name: h +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: PointeeAlign: 16 +; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @test_pointee_align_attribute(i64 addrspace(1)* align 16 %a, + i8 addrspace(3)* align 8 %b, + <2 x i8> addrspace(3)* align 32 %c, + <3 x i8> addrspace(3)* align 64 %d, + <4 x i8> addrspace(3)* align 256 %e, + <8 x i8> addrspace(3)* align 128 %f, + <16 x i8> addrspace(3)* align 1024 %g, + {} addrspace(3)* align 16 %h) + !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 + !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { + ret void +} + + ; CHECK: - Name: __test_block_invoke_kernel ; CHECK-NEXT: SymbolName: '__test_block_invoke_kernel@kd' ; CHECK-NEXT: Language: OpenCL C