diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -7180,7 +7180,10 @@ return ABIArgInfo::getDirect( CGInfo.getCUDADeviceBuiltinTextureDeviceType()); } - return getNaturalAlignIndirect(Ty, /* byval */ true); + // We want to pass whole aggregate value as one argument. + auto AI = ABIArgInfo::getDirect(); + AI.setCanBeFlattened(false); + return AI; } if (const auto *EIT = Ty->getAs()) { diff --git a/clang/test/CodeGen/nvptx-abi.c b/clang/test/CodeGen/nvptx-abi.c --- a/clang/test/CodeGen/nvptx-abi.c +++ b/clang/test/CodeGen/nvptx-abi.c @@ -21,14 +21,14 @@ void foo(float4_t x) { // CHECK-LABEL: @foo -// CHECK: %struct.float4_s* noundef byval(%struct.float4_s) align 4 %x +// CHECK: (%struct.float4_s %x } void fooN(float4_t x, float4_t y, float4_t z) { // CHECK-LABEL: @fooN -// CHECK: %struct.float4_s* noundef byval(%struct.float4_s) align 4 %x -// CHECK: %struct.float4_s* noundef byval(%struct.float4_s) align 4 %y -// CHECK: %struct.float4_s* noundef byval(%struct.float4_s) align 4 %z +// CHECK-SAME: %struct.float4_s %x +// CHECK-SAME: %struct.float4_s %y +// CHECK-SAME: %struct.float4_s %z } typedef struct nested_s { @@ -39,5 +39,5 @@ void baz(nested_t x) { // CHECK-LABEL: @baz -// CHECK: %struct.nested_s* noundef byval(%struct.nested_s) align 8 %x) +// CHECK: (%struct.nested_s %x } diff --git a/clang/test/CodeGenCUDA/kernel-args-alignment.cu b/clang/test/CodeGenCUDA/kernel-args-alignment.cu --- a/clang/test/CodeGenCUDA/kernel-args-alignment.cu +++ b/clang/test/CodeGenCUDA/kernel-args-alignment.cu @@ -36,5 +36,5 @@ // HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24) // DEVICE-LABEL: @_Z6kernelc1SPi -// DEVICE-SAME: i8{{[^,]*}}, %struct.S* noundef byval(%struct.S) align 8{{[^,]*}}, i32* +// DEVICE-SAME: i8{{[^,]*}}, %struct.S %{{[^,]*}}, i32* __global__ void kernel(char a, S s, int *b) {} diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu --- a/clang/test/CodeGenCUDA/kernel-args.cu +++ b/clang/test/CodeGenCUDA/kernel-args.cu @@ -10,14 +10,14 @@ }; // AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) -// NVPTX: define{{.*}} void @_Z6kernel1A(%struct.A* noundef byval(%struct.A) align 8 %x) +// NVPTX: define{{.*}} void @_Z6kernel1A(%struct.A %x __global__ void kernel(A x) { } class Kernel { public: // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}}) - // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(%struct.A* noundef byval(%struct.A) align 8 %x) + // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(%struct.A %x static __global__ void memberKernel(A x){} template static __global__ void templateMemberKernel(T x) {} }; @@ -31,10 +31,10 @@ void test() { Kernel K; // AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} - // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(%struct.A* noundef byval(%struct.A) align 8 %x) + // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(%struct.A %x launch((void*)templateKernel); // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A addrspace(4)* byref(%struct.A) align 8 %{{.+}} - // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* noundef byval(%struct.A) align 8 %x) + // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x launch((void*)Kernel::templateMemberKernel); } diff --git a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp --- a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp +++ b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp @@ -34,7 +34,7 @@ #pragma omp declare target T a = T(); T f = a; -// CHECK: define{{ protected | }}void @{{.+}}foo{{.+}}([[T]]* noundef byval([[T]]) align {{.+}}) +// CHECK: define{{ protected | }}void @{{.+}}foo{{.+}}([[T]] %{{.+}}) void foo(T a = T()) { return; } @@ -54,7 +54,7 @@ } T1 a1 = T1(); T1 f1 = a1; -// CHECK: define{{ protected | }}void @{{.+}}foo1{{.+}}([[T1]]* noundef byval([[T1]]) align {{.+}}) +// CHECK: define{{ protected | }}void @{{.+}}foo1{{.+}}([[T1]] %{{.+}}) void foo1(T1 a = T1()) { return; } @@ -70,4 +70,3 @@ T1 t = bar1(); } #pragma omp end declare target -