diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -488,7 +488,11 @@ B == LangAS::opencl_global_host)) || // Consider pointer size address spaces to be equivalent to default. ((isPtrSizeAddressSpace(A) || A == LangAS::Default) && - (isPtrSizeAddressSpace(B) || B == LangAS::Default)); + (isPtrSizeAddressSpace(B) || B == LangAS::Default)) || + // Default is a superset of SYCL address spaces + (A == LangAS::Default && + (B == LangAS::sycl_private || B == LangAS::sycl_local || + B == LangAS::sycl_global)); } /// Returns true if the address space in these qualifiers is equal to or diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -44,6 +44,11 @@ cuda_constant, cuda_shared, + // SYCL specific address spaces. + sycl_global, + sycl_local, + sycl_private, + // Pointer size and extension address spaces. ptr32_sptr, ptr32_uptr, diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -598,8 +598,8 @@ /// a Spelling enumeration, the value UINT_MAX is returned. unsigned getSemanticSpelling() const; - /// If this is an OpenCL addr space attribute returns its representation - /// in LangAS, otherwise returns default addr space. + /// If this is an OpenCL address space attribute returns its representation + /// in LangAS, otherwise returns default address space. LangAS asOpenCLLangAS() const { switch (getParsedKind()) { case ParsedAttr::AT_OpenCLConstantAddressSpace: @@ -621,6 +621,22 @@ } } + /// If this is an OpenCL address space attribute returns its SYCL + /// representation in LangAS, otherwise returns default address space. + LangAS asSYCLLangAS() const { + switch (getKind()) { + case ParsedAttr::AT_OpenCLGlobalAddressSpace: + return LangAS::sycl_global; + case ParsedAttr::AT_OpenCLLocalAddressSpace: + return LangAS::sycl_local; + case ParsedAttr::AT_OpenCLPrivateAddressSpace: + return LangAS::sycl_private; + case ParsedAttr::AT_OpenCLGenericAddressSpace: + default: + return LangAS::Default; + } + } + AttributeCommonInfo::Kind getKind() const { return AttributeCommonInfo::Kind(Info.AttrKind); } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -930,6 +930,9 @@ 7, // cuda_device 8, // cuda_constant 9, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private 10, // ptr32_sptr 11, // ptr32_uptr 12 // ptr64 diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2376,7 +2376,7 @@ if (Context.getASTContext().addressSpaceMapManglingFor(AS)) { // ::= "AS" unsigned TargetAS = Context.getASTContext().getTargetAddressSpace(AS); - if (TargetAS != 0) + if (TargetAS != 0 || (Context.getASTContext().getLangOpts().SYCLIsDevice)) ASString = "AS" + llvm::utostr(TargetAS); } else { switch (AS) { diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2110,10 +2110,13 @@ case LangAS::Default: return ""; case LangAS::opencl_global: + case LangAS::sycl_global: return "__global"; case LangAS::opencl_local: + case LangAS::sycl_local: return "__local"; case LangAS::opencl_private: + case LangAS::sycl_private: return "__private"; case LangAS::opencl_constant: return "__constant"; diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -583,18 +583,10 @@ return new X86_64TargetInfo(Triple, Opts); } - case llvm::Triple::spir: { - if (Triple.getOS() != llvm::Triple::UnknownOS || - Triple.getEnvironment() != llvm::Triple::UnknownEnvironment) - return nullptr; + case llvm::Triple::spir: return new SPIR32TargetInfo(Triple, Opts); - } - case llvm::Triple::spir64: { - if (Triple.getOS() != llvm::Triple::UnknownOS || - Triple.getEnvironment() != llvm::Triple::UnknownEnvironment) - return nullptr; + case llvm::Triple::spir64: return new SPIR64TargetInfo(Triple, Opts); - } case llvm::Triple::wasm32: if (Triple.getSubArch() != llvm::Triple::NoSubArch || Triple.getVendor() != llvm::Triple::UnknownVendor || diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -51,6 +51,9 @@ Global, // cuda_device Constant, // cuda_constant Local, // cuda_shared + Global, // sycl_global + Local, // sycl_local + Private, // sycl_private Generic, // ptr32_sptr Generic, // ptr32_uptr Generic // ptr64 @@ -68,6 +71,9 @@ Global, // cuda_device Constant, // cuda_constant Local, // cuda_shared + Global, // sycl_global + Local, // sycl_local + Private, // sycl_private Generic, // ptr32_sptr Generic, // ptr32_uptr Generic // ptr64 diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -35,6 +35,9 @@ 1, // cuda_device 4, // cuda_constant 3, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr 0 // ptr64 diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -33,6 +33,29 @@ 0, // cuda_device 0, // cuda_constant 0, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private + 0, // ptr32_sptr + 0, // ptr32_uptr + 0 // ptr64 +}; + +static const unsigned SYCLAddrSpaceMap[] = { + 4, // Default + 1, // opencl_global + 3, // opencl_local + 2, // opencl_constant + 0, // opencl_private + 4, // opencl_generic + 5, // opencl_global_device + 6, // opencl_global_host + 0, // cuda_device + 0, // cuda_constant + 0, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr 0 // ptr64 @@ -42,14 +65,14 @@ public: SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { - assert(getTriple().getOS() == llvm::Triple::UnknownOS && - "SPIR target must use unknown OS"); - assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && - "SPIR target must use unknown environment type"); TLSSupported = false; VLASupported = false; LongWidth = LongAlign = 64; - AddrSpaceMap = &SPIRAddrSpaceMap; + if (Triple.getEnvironment() == llvm::Triple::SYCLDevice) { + AddrSpaceMap = &SYCLAddrSpaceMap; + } else { + AddrSpaceMap = &SPIRAddrSpaceMap; + } UseAddrSpaceMapMangling = true; HasLegalHalfType = true; HasFloat16 = true; @@ -97,6 +120,16 @@ return CC_SpirFunction; } + llvm::Optional getConstantAddressSpace() const override { + // If we assign "opencl_constant" address space the following code becomes + // illegal, because it can't be cast to any other address space: + // + // const char *getLiteral() { + // return "AB"; + // } + return LangAS::sycl_global; + } + void setSupportedOpenCLOpts() override { // Assume all OpenCL extensions and optional core features are supported // for SPIR since it is a generic target. @@ -107,6 +140,7 @@ bool hasInt128Type() const override { return false; } }; + class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { public: SPIR32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) diff --git a/clang/lib/Basic/Targets/TCE.h b/clang/lib/Basic/Targets/TCE.h --- a/clang/lib/Basic/Targets/TCE.h +++ b/clang/lib/Basic/Targets/TCE.h @@ -42,6 +42,9 @@ 0, // cuda_device 0, // cuda_constant 0, // cuda_shared + 3, // sycl_global + 4, // sycl_local + 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr 0, // ptr64 diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -35,6 +35,9 @@ 0, // cuda_device 0, // cuda_constant 0, // cuda_shared + 0, // sycl_global + 0, // sycl_local + 0, // sycl_private 270, // ptr32_sptr 271, // ptr32_uptr 272 // ptr64 diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9941,7 +9941,17 @@ public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} + + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; + unsigned getOpenCLKernelCallingConv() const override; + bool shouldEmitStaticExternCAliases() const override; }; } // End anonymous namespace. @@ -9963,6 +9973,32 @@ return llvm::CallingConv::SPIR_KERNEL; } +bool SPIRTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} + +LangAS SPIRTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::sycl_global)); + if (!D) + return DefaultGlobalAS; + + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + if (CGM.isTypeConstant(D->getType(), false)) { + if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) + return ConstAS.getValue(); + } + return DefaultGlobalAS; +} + static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6369,6 +6369,7 @@ llvm::APSInt max(addrSpace.getBitWidth()); max = Qualifiers::MaxAddressSpace - (unsigned)LangAS::FirstTargetAddressSpace; + if (addrSpace > max) { S.Diag(AttrLoc, diag::err_attribute_address_space_too_high) << (unsigned)max.getZExtValue() << AddrSpace->getSourceRange(); @@ -6484,7 +6485,9 @@ Attr.setInvalid(); } else { // The keyword-based type attributes imply which address space to use. - ASIdx = Attr.asOpenCLLangAS(); + ASIdx = S.getLangOpts().SYCLIsDevice ? Attr.asSYCLLangAS() + : Attr.asOpenCLLangAS(); + if (ASIdx == LangAS::Default) llvm_unreachable("Invalid address space"); diff --git a/clang/test/CodeGenSYCL/address-space-cond-op.cpp b/clang/test/CodeGenSYCL/address-space-cond-op.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-space-cond-op.cpp @@ -0,0 +1,49 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -x c++ -triple spir64-unknown-linux-sycldevice -disable-llvm-passes -fsycl -fsycl-is-device -emit-llvm %s -o - | FileCheck %s + +struct S { + unsigned short x; +}; + +// CHECK-LABEL: define {{.*}} @_Z3foobR1SS_( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast i8* [[COND_ADDR]] to i8 addrspace(4)* +// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT_S:%.*]] addrspace(4)*, align 8 +// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT_S]] addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8 +// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1 +// CHECK-NEXT: store [[STRUCT_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT_S]] addrspace(4)* +// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1 +// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT_S]] addrspace(4)*, [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)* +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)* +// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false) +// CHECK-NEXT: ret void +// +S foo(bool cond, S &lhs, S rhs) { + S val = cond ? lhs : rhs; + return val; +} + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + kernel([]() { + S lhs, rhs; + foo(true, lhs, rhs); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-space-of-returns.cpp b/clang/test/CodeGenSYCL/address-space-of-returns.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-space-of-returns.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm -x c++ %s -o - | FileCheck %s + +struct A { + int B[42]; +}; + +const char *ret_char() { + return "N"; +} +// CHECK: ret i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* addrspacecast ([2 x i8] addrspace(1)* @.str to [2 x i8] addrspace(4)*), i64 0, i64 0) + +const char *ret_arr() { + static const char Arr[42] = {0}; + return Arr; +} +// CHECK: ret i8 addrspace(4)* getelementptr inbounds ([42 x i8], [42 x i8] addrspace(4)* addrspacecast ([42 x i8] addrspace(1)* @{{.*}}ret_arr{{.*}}Arr to [42 x i8] addrspace(4)*), i64 0, i64 0) + +const char &ret_ref() { + static const char a = 'A'; + return a; +} +// CHECK: ret i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @{{.*}}ret_ref{{.*}} to i8 addrspace(4)*) + +A ret_agg() { + A a; + return a; +} +// CHECK: define{{.*}} spir_func void @{{.*}}ret_agg{{.*}}(%struct.A addrspace(4)* noalias sret(%struct.A) align 4 %agg.result) + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +int main() { + kernel_single_task([]() { + ret_char(); + ret_arr(); + ret_ref(); + ret_agg(); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp @@ -0,0 +1,131 @@ +// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +void bar(int &Data) {} +// CHECK-DAG: define{{.*}} spir_func void @[[RAW_REF:[a-zA-Z0-9_]+]](i32 addrspace(4)* align 4 dereferenceable(4) % +void bar2(int &Data) {} +// CHECK-DAG: define{{.*}} spir_func void @[[RAW_REF2:[a-zA-Z0-9_]+]](i32 addrspace(4)* align 4 dereferenceable(4) % +void bar(__attribute__((opencl_local)) int &Data) {} +// CHECK-DAG: define{{.*}} spir_func void [[LOC_REF:@[a-zA-Z0-9_]+]](i32 addrspace(3)* align 4 dereferenceable(4) % +void foo(int *Data) {} +// CHECK-DAG: define{{.*}} spir_func void @[[RAW_PTR:[a-zA-Z0-9_]+]](i32 addrspace(4)* % +void foo2(int *Data) {} +// CHECK-DAG: define{{.*}} spir_func void @[[RAW_PTR2:[a-zA-Z0-9_]+]](i32 addrspace(4)* % +void foo(__attribute__((opencl_local)) int *Data) {} +// CHECK-DAG: define{{.*}} spir_func void [[LOC_PTR:@[a-zA-Z0-9_]+]](i32 addrspace(3)* % + +template +void tmpl(T t) {} +// See Check Lines below. + +void usages() { + // CHECK-DAG: [[GLOB:%[a-zA-Z0-9]+]] = alloca i32 addrspace(1)* + // CHECK-DAG: [[GLOB]].ascast = addrspacecast i32 addrspace(1)** [[GLOB]] to i32 addrspace(1)* addrspace(4)* + __attribute__((opencl_global)) int *GLOB; + // CHECK-DAG: [[LOC:%[a-zA-Z0-9]+]] = alloca i32 addrspace(3)* + // CHECK-DAG: [[LOC]].ascast = addrspacecast i32 addrspace(3)** [[LOC]] to i32 addrspace(3)* addrspace(4)* + __attribute__((opencl_local)) int *LOC; + // CHECK-DAG: [[NoAS:%[a-zA-Z0-9]+]] = alloca i32 addrspace(4)* + // CHECK-DAG: [[NoAS]].ascast = addrspacecast i32 addrspace(4)** [[NoAS]] to i32 addrspace(4)* addrspace(4)* + int *NoAS; + // CHECK-DAG: [[PRIV:%[a-zA-Z0-9]+]] = alloca i32* + // CHECK-DAG: [[PRIV]].ascast = addrspacecast i32** [[PRIV]] to i32* addrspace(4)* + __attribute__((opencl_private)) int *PRIV; + + bar(*GLOB); + // CHECK-DAG: [[GLOB_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[GLOB_CAST]]) + bar2(*GLOB); + // CHECK-DAG: [[GLOB_LOAD2:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST2:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD2]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[GLOB_CAST2]]) + + bar(*LOC); + // CHECK-DAG: [[LOC_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: call spir_func void [[LOC_REF]](i32 addrspace(3)* align 4 dereferenceable(4) [[LOC_LOAD]]) + bar2(*LOC); + // CHECK-DAG: [[LOC_LOAD2:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: [[LOC_CAST2:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOC_LOAD2]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[LOC_CAST2]]) + + bar(*NoAS); + // CHECK-DAG: [[NoAS_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[NoAS_LOAD]]) + bar2(*NoAS); + // CHECK-DAG: [[NoAS_LOAD2:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[NoAS_LOAD2]]) + + foo(GLOB); + // CHECK-DAG: [[GLOB_LOAD3:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST3:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD3]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_PTR]](i32 addrspace(4)* [[GLOB_CAST3]]) + foo2(GLOB); + // CHECK-DAG: [[GLOB_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST4:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD4]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_PTR2]](i32 addrspace(4)* [[GLOB_CAST4]]) + foo(LOC); + // CHECK-DAG: [[LOC_LOAD3:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: call spir_func void [[LOC_PTR]](i32 addrspace(3)* [[LOC_LOAD3]]) + foo2(LOC); + // CHECK-DAG: [[LOC_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: [[LOC_CAST4:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOC_LOAD4]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_PTR2]](i32 addrspace(4)* [[LOC_CAST4]]) + foo(NoAS); + // CHECK-DAG: [[NoAS_LOAD3:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_PTR]](i32 addrspace(4)* [[NoAS_LOAD3]]) + foo2(NoAS); + // CHECK-DAG: [[NoAS_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_PTR2]](i32 addrspace(4)* [[NoAS_LOAD4]]) + + // Ensure that we still get 3 different template instantiations. + tmpl(GLOB); + // CHECK-DAG: [[GLOB_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: call spir_func void [[GLOB_TMPL:@[a-zA-Z0-9_]+]](i32 addrspace(1)* [[GLOB_LOAD4]]) + tmpl(LOC); + // CHECK-DAG: [[LOC_LOAD5:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: call spir_func void [[LOC_TMPL:@[a-zA-Z0-9_]+]](i32 addrspace(3)* [[LOC_LOAD5]]) + tmpl(PRIV); + // CHECK-DAG: [[PRIV_LOAD5:%[a-zA-Z0-9]+]] = load i32*, i32* addrspace(4)* [[PRIV]].ascast + // CHECK-DAG: call spir_func void [[PRIV_TMPL:@[a-zA-Z0-9_]+]](i32* [[PRIV_LOAD5]]) + tmpl(NoAS); + // CHECK-DAG: [[NoAS_LOAD5:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void [[GEN_TMPL:@[a-zA-Z0-9_]+]](i32 addrspace(4)* [[NoAS_LOAD5]]) +} + +// CHECK-DAG: define linkonce_odr spir_func void [[GLOB_TMPL]](i32 addrspace(1)* % +// CHECK-DAG: define linkonce_odr spir_func void [[LOC_TMPL]](i32 addrspace(3)* % +// CHECK-DAG: define linkonce_odr spir_func void [[PRIV_TMPL]](i32* % +// CHECK-DAG: define linkonce_odr spir_func void [[GEN_TMPL]](i32 addrspace(4)* % + +void usages2() { + __attribute__((opencl_private)) int *PRIV; + // CHECK-DAG: [[PRIV:%[a-zA-Z0-9]+]] = alloca i32* + // CHECK-DAG: [[PRIV]].ascast = addrspacecast i32** [[PRIV]] to i32* addrspace(4)* + __attribute__((opencl_global)) int *GLOB; + // CHECK-DAG: [[GLOB:%[a-zA-Z0-9]+]] = alloca i32 addrspace(1)* + // CHECK-DAG: [[GLOB]].ascast = addrspacecast i32 addrspace(1)** [[GLOB]] to i32 addrspace(1)* addrspace(4)* + __attribute__((opencl_local)) int *LOCAL; + // CHECK-DAG: [[LOCAL:%[a-zA-Z0-9]+]] = alloca i32 addrspace(3)* + // CHECK-DAG: [[LOCAL]].ascast = addrspacecast i32 addrspace(3)** [[LOCAL]] to i32 addrspace(3)* addrspace(4)* + + bar(*PRIV); + // CHECK-DAG: [[PRIV_LOAD:%[a-zA-Z0-9]+]] = load i32*, i32* addrspace(4)* [[PRIV]].ascast + // CHECK-DAG: [[PRIV_ASCAST:%[a-zA-Z0-9]+]] = addrspacecast i32* [[PRIV_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[PRIV_ASCAST]]) + bar(*GLOB); + // CHECK-DAG: [[GLOB_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[GLOB_CAST]]) + bar2(*LOCAL); + // CHECK-DAG: [[LOCAL_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOCAL]].ascast + // CHECK-DAG: [[LOCAL_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOCAL_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[LOCAL_CAST]]) +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} +int main() { + kernel_single_task([]() { usages();usages2(); }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-spaces-struct.cpp b/clang/test/CodeGenSYCL/address-spaces-struct.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-spaces-struct.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm -x c++ %s -o - | FileCheck %s + +struct Padding { + int i, j; +}; + +struct HasX { + int x; +}; + +struct Y : Padding, HasX {}; + +void bar(HasX &hx); + +// CHECK-LABEL: @_Z3baz{{.*}} +void baz(Y &y) { + // CHECK: %[[FIRST:[a-zA-Z0-9]+]] = bitcast %struct.Y addrspace(4)* %{{.*}} to i8 addrspace(4)* + // CHECK: %[[OFFSET:[a-zA-Z0-9]+]].ptr = getelementptr inbounds i8, i8 addrspace(4)* %[[FIRST]], i64 8 + // CHECK: %[[SECOND:[a-zA-Z0-9]+]] = bitcast i8 addrspace(4)* %[[OFFSET]].ptr to %struct.HasX addrspace(4)* + // CHECK: call spir_func void @{{.*}}bar{{.*}}(%struct.HasX addrspace(4)* align 4 dereferenceable(4) %[[SECOND]]) + bar(y); +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +int main() { + kernel_single_task([]() { + Y yy; + baz(yy); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-spaces.cpp b/clang/test/CodeGenSYCL/address-spaces.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-spaces.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm -x c++ %s -o - | FileCheck %s + +// CHECK: @_ZZ4testvE3foo = internal addrspace(1) constant i32 66, align 4 +// CHECK: @[[STR:[.a-zA-Z0-9_]+]] = private unnamed_addr addrspace(1) constant [14 x i8] c"Hello, world!\00", align 1 + +// CHECK-LABEL: @_Z4testv +void test() { + static const int foo = 0x42; + + // CHECK: %i.ascast = addrspacecast i32* %i to i32 addrspace(4)* + // CHECK: %[[ARR:[a-zA-Z0-9]+]] = alloca [42 x i32] + // CHECK: %[[ARR]].ascast = addrspacecast [42 x i32]* %[[ARR]] to [42 x i32] addrspace(4)* + + int i = 0; + int *pptr = &i; + // CHECK: store i32 addrspace(4)* %i.ascast, i32 addrspace(4)* addrspace(4)* %pptr.ascast + bool is_i_ptr = (pptr == &i); + // CHECK: %[[VALPPTR:[0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %pptr.ascast + // CHECK: %cmp{{[0-9]*}} = icmp eq i32 addrspace(4)* %[[VALPPTR]], %i.ascast + *pptr = foo; + + int var23 = 23; + char *cp = (char *)&var23; + *cp = 41; + // CHECK: store i32 23, i32 addrspace(4)* %[[VAR:[a-zA-Z0-9.]+]] + // CHECK: [[VARCAST:%.*]] = bitcast i32 addrspace(4)* %[[VAR]] to i8 addrspace(4)* + // CHECK: store i8 addrspace(4)* [[VARCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} + + int arr[42]; + char *cpp = (char *)arr; + *cpp = 43; + // CHECK: [[ARRDECAY:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 + // CHECK: [[ARRCAST:%.*]] = bitcast i32 addrspace(4)* [[ARRDECAY]] to i8 addrspace(4)* + // CHECK: store i8 addrspace(4)* [[ARRCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} + + int *aptr = arr + 10; + if (aptr < arr + sizeof(arr)) + *aptr = 44; + // CHECK: %[[VALAPTR:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %aptr.ascast + // CHECK: %[[ARRDCY2:.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 + // CHECK: %[[ADDPTR:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[ARRDCY2]], i64 168 + // CHECK: %cmp{{[0-9]+}} = icmp ult i32 addrspace(4)* %[[VALAPTR]], %[[ADDPTR]] + + const char *str = "Hello, world!"; + // CHECK: store i8 addrspace(4)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(4)* addrspacecast ([14 x i8] addrspace(1)* @[[STR]] to [14 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* %[[STRVAL:[a-zA-Z0-9]+]].ascast, align 8 + + i = str[0]; + + const char *phi_str = i > 2 ? str : "Another hello world!"; + (void)phi_str; + // CHECK: %[[COND:[a-zA-Z0-9]+]] = icmp sgt i32 %{{.*}}, 2 + // CHECK: br i1 %[[COND]], label %[[CONDTRUE:[.a-zA-Z0-9]+]], label %[[CONDFALSE:[.a-zA-Z0-9]+]] + + // CHECK: [[CONDTRUE]]: + // CHECK-NEXT: %[[VALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %str.ascast + // CHECK-NEXT: br label %[[CONDEND:[.a-zA-Z0-9]+]] + + // CHECK: [[CONDFALSE]]: + + // CHECK: [[CONDEND]]: + // CHECK-NEXT: phi i8 addrspace(4)* [ %[[VALTRUE]], %[[CONDTRUE]] ], [ getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @{{.*}} to [21 x i8] addrspace(4)*), i64 0, i64 0), %[[CONDFALSE]] ] + + const char *select_null = i > 2 ? "Yet another Hello world" : nullptr; + (void)select_null; + // CHECK: select i1 %{{.*}}, i8 addrspace(4)* getelementptr inbounds ([24 x i8], [24 x i8] addrspace(4)* addrspacecast ([24 x i8] addrspace(1)* @{{.*}} to [24 x i8] addrspace(4)*), i64 0, i64 0) + + const char *select_str_trivial1 = true ? str : "Another hello world!"; + (void)select_str_trivial1; + // CHECK: %[[TRIVIALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[STRVAL]] + // CHECK: store i8 addrspace(4)* %[[TRIVIALTRUE]], i8 addrspace(4)* addrspace(4)* %{{.*}}, align 8 + + const char *select_str_trivial2 = false ? str : "Another hello world!"; + (void)select_str_trivial2; +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +int main() { + kernel_single_task([]() { test(); }); + return 0; +} diff --git a/clang/test/SemaSYCL/address-space-parameter-conversions.cpp b/clang/test/SemaSYCL/address-space-parameter-conversions.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaSYCL/address-space-parameter-conversions.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only -x c++ %s + +void bar(int &Data) {} +void bar2(int &Data) {} +void bar(__attribute__((opencl_private)) int &Data) {} +void foo(int *Data) {} +void foo2(int *Data) {} +void foo(__attribute__((opencl_private)) int *Data) {} + +template +void tmpl(T *t) {} + +void usages() { + __attribute__((opencl_global)) int *GLOB; + __attribute__((opencl_private)) int *PRIV; + __attribute__((opencl_local)) int *LOC; + int *NoAS; + + bar(*GLOB); + bar2(*GLOB); + + bar(*PRIV); + bar2(*PRIV); + + bar(*NoAS); + bar2(*NoAS); + + bar(*LOC); + bar2(*LOC); + + foo(GLOB); + foo2(GLOB); + foo(PRIV); + foo2(PRIV); + foo(NoAS); + foo2(NoAS); + foo(LOC); + foo2(LOC); + + tmpl(GLOB); + tmpl(PRIV); + tmpl(NoAS); + tmpl(LOC); + + (void)static_cast(GLOB); + (void)static_cast(GLOB); + // FIXME: determine if we can warn on the below conversions. + int *i = GLOB; + void *v = GLOB; + (void)i; + (void)v; + + __generic int *IsGeneric; // expected-error{{unknown type name '__generic'}} + __private int *IsPrivate; // expected-error{{unknown type name '__private'}} + __global int *IsGlobal; // expected-error{{unknown type name '__global'}} + __local int *IsLocal; // expected-error{{unknown type name '__local'}} +} diff --git a/clang/test/SemaTemplate/address_space-dependent.cpp b/clang/test/SemaTemplate/address_space-dependent.cpp --- a/clang/test/SemaTemplate/address_space-dependent.cpp +++ b/clang/test/SemaTemplate/address_space-dependent.cpp @@ -43,7 +43,7 @@ template void tooBig() { - __attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388593)}} + __attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388590)}} } template @@ -101,7 +101,7 @@ car<1, 2, 3>(); // expected-note {{in instantiation of function template specialization 'car<1, 2, 3>' requested here}} HasASTemplateFields<1> HASTF; neg<-1>(); // expected-note {{in instantiation of function template specialization 'neg<-1>' requested here}} - correct<0x7FFFF1>(); + correct<0x7FFFED>(); tooBig<8388650>(); // expected-note {{in instantiation of function template specialization 'tooBig<8388650>' requested here}} __attribute__((address_space(1))) char *x; diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -222,8 +222,9 @@ Itanium, Cygnus, CoreCLR, + SYCLDevice, Simulator, // Simulator variants of other systems, e.g., Apple's iOS - MacABI, // Mac Catalyst variant of Apple's iOS deployment target. + MacABI, // Mac Catalyst variant of Apple's iOS deployment target. LastEnvironmentType = MacABI }; enum ObjectFormatType { @@ -497,6 +498,10 @@ isMacCatalystEnvironment())); } + bool isSYCLDeviceEnvironment() const { + return getEnvironment() == Triple::SYCLDevice; + } + bool isOSNetBSD() const { return getOS() == Triple::NetBSD; } diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -247,6 +247,8 @@ case Musl: return "musl"; case MuslEABI: return "musleabi"; case MuslEABIHF: return "musleabihf"; + case SYCLDevice: + return "sycldevice"; case Simulator: return "simulator"; } @@ -554,6 +556,7 @@ .StartsWith("itanium", Triple::Itanium) .StartsWith("cygnus", Triple::Cygnus) .StartsWith("coreclr", Triple::CoreCLR) + .StartsWith("sycldevice", Triple::SYCLDevice) .StartsWith("simulator", Triple::Simulator) .StartsWith("macabi", Triple::MacABI) .Default(Triple::UnknownEnvironment);