Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -6825,10 +6825,50 @@ namespace { +class AMDGPUABIInfo final : public DefaultABIInfo { +public: + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyArgumentType(QualType Ty) const; + + void computeInfo(CGFunctionInfo &FI) const override; +}; + +void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + unsigned CC = FI.getCallingConvention(); + for (auto &Arg : FI.arguments()) + if (CC == llvm::CallingConv::AMDGPU_KERNEL) + Arg.info = classifyArgumentType(Arg.type); + else + Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type); +} + +/// \brief Classify argument of given type \p Ty. +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { + llvm::StructType *StrTy = dyn_cast(CGT.ConvertType(Ty)); + if (!StrTy) { + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Coerce single element structs to its element. + if (StrTy->getNumElements() == 1) { + return ABIArgInfo::getDirect(); + } + + // If we set CanBeFlattened to true, CodeGen will expand the struct to its + // individual elements, which confuses the Clover OpenCL backend; therefore we + // have to set it to false here. Other args of getDirect() are just defaults. + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); +} + class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; Index: test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -0,0 +1,66 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +// CHECK-NOT: %struct.single_element_struct_arg = type { i32 } +typedef struct single_element_struct_arg +{ + int i; +} single_element_struct_arg_t; + +// CHECK: %struct.struct_arg = type { i32, float, i32 } +typedef struct struct_arg +{ + int i1; + float f; + int i2; +} struct_arg_t; + +// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 } +typedef struct struct_of_arrays_arg +{ + int i1[2]; + float f1; + int i2[4]; + float f2[3]; + int i3; +} struct_of_arrays_arg_t; + +// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 } +typedef struct struct_of_structs_arg +{ + int i1; + float f1; + struct_arg_t s1; + int i2; +} struct_of_structs_arg_t; + +// CHECK-LABEL: @test_single_element_struct_arg +// CHECK: i32 %arg1.coerce +__kernel void test_single_element_struct_arg(single_element_struct_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_struct_arg +// CHECK: %struct.struct_arg %arg1.coerce +__kernel void test_struct_arg(struct_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_struct_of_arrays_arg +// CHECK: %struct.struct_of_arrays_arg %arg1.coerce +__kernel void test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_struct_of_structs_arg +// CHECK: %struct.struct_of_structs_arg %arg1.coerce +__kernel void test_struct_of_structs_arg(struct_of_structs_arg_t arg1) +{ +} + +// CHECK-LABEL: @test_non_kernel_struct_arg +// CHECK-NOT: %struct.struct_arg %arg1.coerce +// CHECK: %struct.struct_arg* byval +void test_non_kernel_struct_arg(struct_arg_t arg1) +{ +}