diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2485,6 +2485,20 @@ } } + // From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types: + // > For arguments to a __kernel function declared to be a pointer to a + // > data type, the OpenCL compiler can assume that the pointee is always + // > appropriately aligned as required by the data type. + if (TargetDecl && TargetDecl->hasAttr() && + ParamType->isPointerType()) { + QualType PTy = ParamType->getPointeeType(); + if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { + llvm::Align Alignment = + getNaturalPointeeTypeAlignment(ParamType).getAsAlign(); + Attrs.addAlignmentAttr(Alignment); + } + } + switch (FI.getExtParameterInfo(ArgNo).getABI()) { case ParameterABI::Ordinary: break; diff --git a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl --- a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly %out) +// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out) // CHECK: store i32 4, i32 addrspace(1)* %out, align 4 kernel void test_kernel(global int *out) diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -34,7 +34,7 @@ out[id] = id; } -// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i) +// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* align 4 %{{.*}}, i32 addrspace(1)* align 4 %b, i32 %i) kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: %default_queue = alloca %opencl.queue_t* queue_t default_queue; diff --git a/clang/test/CodeGenOpenCL/kernel-param-alignment.cl b/clang/test/CodeGenOpenCL/kernel-param-alignment.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/kernel-param-alignment.cl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s + +// Test that pointer arguments to kernels are assumed to be ABI aligned. + +struct __attribute__((packed, aligned(1))) packed { + int i32; +}; + +typedef __attribute__((ext_vector_type(4))) int int4; +typedef __attribute__((ext_vector_type(2))) float float2; + +kernel void test( + global int *i32, + global long *i64, + global int4 *v4i32, + global float2 *v2f32, + global void *v, + global struct packed *p) { +// CHECK-LABEL: spir_kernel void @test( +// CHECK-SAME: i32* nocapture noundef align 4 %i32, +// CHECK-SAME: i64* nocapture noundef align 8 %i64, +// CHECK-SAME: <4 x i32>* nocapture noundef align 16 %v4i32, +// CHECK-SAME: <2 x float>* nocapture noundef align 8 %v2f32, +// CHECK-SAME: i8* nocapture noundef %v, +// CHECK-SAME: %struct.packed* nocapture noundef align 1 %p) +} diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl --- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl @@ -28,7 +28,7 @@ // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_single // CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single) -// CHECK: i32* nocapture noundef writeonly %output +// CHECK: i32* nocapture noundef writeonly align 4 %output output[0] = input.a; } @@ -36,7 +36,7 @@ // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_pair // CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair) -// CHECK: i32* nocapture noundef writeonly %output +// CHECK: i32* nocapture noundef writeonly align 4 %output output[0] = (int)input.a; output[1] = (int)input.b; } @@ -45,7 +45,7 @@ // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel // CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct) -// CHECK: i32* nocapture noundef writeonly %output +// CHECK: i32* nocapture noundef writeonly align 4 %output output[0] = input.elementA; output[1] = input.elementB; output[2] = (int)input.elementC; diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl --- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl +++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl @@ -5,14 +5,14 @@ kernel void bar(global int *A); kernel void foo(global int *A) -// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef %A) +// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef align 4 %A) { int id = get_dummy_id(0); // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0) A[id] = id; bar(A); - // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef %A) + // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef align 4 %A) } // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef) -// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef) +// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef align 4)