diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -331,6 +331,7 @@ "Disable recognition of objc_direct methods") LANGOPT(CFProtectionBranch , 1, 0, "Control-Flow Branch Protection enabled") LANGOPT(FakeAddressSpaceMap , 1, 0, "OpenCL fake address space map") +LANGOPT(OpenCLForceVectorABI, 1, 0, "OpenCL vector to scalar coercion disabling") ENUM_LANGOPT(AddressSpaceMapMangling , AddrSpaceMapMangling, 2, ASMM_Target, "OpenCL address space map mangling mode") LANGOPT(IncludeDefaultHeader, 1, 0, "Include default header file for OpenCL") LANGOPT(DeclareOpenCLBuiltins, 1, 0, "Declare OpenCL builtin functions") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6260,6 +6260,9 @@ def fno_bitfield_type_align : Flag<["-"], "fno-bitfield-type-align">, HelpText<"Ignore bit-field types when aligning structures">, MarshallingInfoFlag>; +def fopencl_force_vector_abi : Flag<["-"], "fopencl-force-vector-abi">, + HelpText<"Disable vector to scalar coercion for OpenCL">, + MarshallingInfoFlag>; def ffake_address_space_map : Flag<["-"], "ffake-address-space-map">, HelpText<"Use a fake address space map; OpenCL testing purposes only">, MarshallingInfoFlag>; diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -100,6 +100,41 @@ return Address::invalid(); } +static ABIArgInfo classifyOpenCL(QualType Ty, ASTContext &Context) { + if (Ty->isVoidType()) + return ABIArgInfo::getIgnore(); + + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + + if (const RecordType *RT = Ty->getAs()) + return ABIArgInfo::getIndirect(Context.getTypeAlignInChars(RT), + /*ByVal=*/false); + + if (Context.isPromotableIntegerType(Ty)) + return ABIArgInfo::getExtend(Ty); + + return ABIArgInfo::getDirect(); +} + +static bool doOpenCLClassification(CGFunctionInfo &FI, ASTContext &Context) { + if (!Context.getLangOpts().OpenCL) + return false; + if (!Context.getLangOpts().OpenCLForceVectorABI) + return false; + + // Use OpenCL classify to prevent coercing + // Vector ABI must be enforced by enabling the corresponding option + // Otherwise, vector types will be coerced to a matching integer + // type to conform with ABI, e.g.: <8 x i8> will be coerced to i64 + FI.getReturnInfo() = classifyOpenCL(FI.getReturnType(), Context); + + for (auto &Arg : FI.arguments()) + Arg.info = classifyOpenCL(Arg.type, Context); + + return true; +} + static llvm::Type *getVAListElementType(CodeGenFunction &CGF) { return CGF.ConvertTypeForMem( CGF.getContext().getBuiltinVaListType()->getPointeeType()); @@ -1964,6 +1999,10 @@ } void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { + ASTContext &Context = getContext(); + if (doOpenCLClassification(FI, Context)) + return; + CCState State(FI); if (IsMCUABI) State.FreeRegs = 3; @@ -3950,6 +3989,9 @@ } void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { + ASTContext &Context = getContext(); + if (doOpenCLClassification(FI, Context)) + return; const unsigned CallingConv = FI.getCallingConvention(); // It is possible to force Win64 calling convention on any x86_64 target by @@ -4407,6 +4449,10 @@ } void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { + ASTContext &Context = getContext(); + if (doOpenCLClassification(FI, Context)) + return; + const unsigned CC = FI.getCallingConvention(); bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3832,6 +3832,8 @@ } } + Opts.OpenCLForceVectorABI = Args.hasArg(OPT_fopencl_force_vector_abi); + // Check if -fopenmp is specified and set default version to 5.0. Opts.OpenMP = Args.hasArg(OPT_fopenmp) ? 50 : 0; // Check if -fopenmp-simd is specified. diff --git a/clang/test/CodeGenOpenCL/vector-to-scalar-coercion.cl b/clang/test/CodeGenOpenCL/vector-to-scalar-coercion.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/vector-to-scalar-coercion.cl @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -x cl -triple i686-unknown-unknown -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER +// RUN: %clang_cc1 -x cl -triple x86_64-unknown-unknown -fopencl-force-vector-abi %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix NOCOER +// RUN: %clang_cc1 -x cl -triple i686-unknown-unknown %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32CL +// RUN: %clang_cc1 -x cl -triple x86_64-unknown-unknown %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64CL +// RUN: %clang_cc1 -x c -triple i686-unknown-unknown %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER32 +// RUN: %clang_cc1 -x c -triple x86_64-unknown-unknown %s -O0 -emit-llvm -o - | FileCheck %s --check-prefix COER64 + +typedef unsigned short ushort; +typedef ushort ushort4 __attribute__((ext_vector_type(4))); + +typedef unsigned long ulong; +typedef ulong ulong4 __attribute__((ext_vector_type(4))); + +ulong4 __attribute__((const)) __attribute__((overloadable)) convert_ulong4_rte(ushort4 x) +{ + return 1; +} + +// NOCOER: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(<4 x i16> noundef %{{.*}}) +// COER32CL: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}}) +// COER64CL: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(double noundef %{{.*}}) +// COER32: define {{.*}}<4 x i32> @_Z18convert_ulong4_rteDv4_t(i64 noundef %{{.*}}) +// COER64: define {{.*}}<4 x i64> @_Z18convert_ulong4_rteDv4_t(double noundef %{{.*}})