diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -494,7 +494,12 @@ /// Returns true if the address space in these qualifiers is equal to or /// a superset of the address space in the argument qualifiers. bool isAddressSpaceSupersetOf(Qualifiers other) const { - return isAddressSpaceSupersetOf(getAddressSpace(), other.getAddressSpace()); + return isAddressSpaceSupersetOf(getAddressSpace(), + other.getAddressSpace()) || + (!hasAddressSpace() && + (other.getAddressSpace() == LangAS::sycl_private || + other.getAddressSpace() == LangAS::sycl_local || + other.getAddressSpace() == LangAS::sycl_global)); } /// Determines if these qualifiers compatibly include another set. diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -44,6 +44,11 @@ cuda_constant, cuda_shared, + // SYCL specific address spaces. + sycl_global, + sycl_local, + sycl_private, + // Pointer size and extension address spaces. ptr32_sptr, ptr32_uptr, diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h --- a/clang/include/clang/Sema/ParsedAttr.h +++ b/clang/include/clang/Sema/ParsedAttr.h @@ -598,8 +598,8 @@ /// a Spelling enumeration, the value UINT_MAX is returned. unsigned getSemanticSpelling() const; - /// If this is an OpenCL addr space attribute returns its representation - /// in LangAS, otherwise returns default addr space. + /// If this is an OpenCL address space attribute returns its representation + /// in LangAS, otherwise returns default address space. LangAS asOpenCLLangAS() const { switch (getParsedKind()) { case ParsedAttr::AT_OpenCLConstantAddressSpace: @@ -621,6 +621,23 @@ } } + // TBD: SYCL-specific ParsedAttr? + /// If this is an OpenCL address space attribute returns its SYCL + /// representation in LangAS, otherwise returns default address space. + LangAS asSYCLLangAS() const { + switch (getKind()) { + case ParsedAttr::AT_OpenCLGlobalAddressSpace: + return LangAS::sycl_global; + case ParsedAttr::AT_OpenCLLocalAddressSpace: + return LangAS::sycl_local; + case ParsedAttr::AT_OpenCLPrivateAddressSpace: + return LangAS::sycl_private; + case ParsedAttr::AT_OpenCLGenericAddressSpace: + default: + return LangAS::Default; + } + } + AttributeCommonInfo::Kind getKind() const { return AttributeCommonInfo::Kind(Info.AttrKind); } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -930,6 +930,9 @@ 7, // cuda_device 8, // cuda_constant 9, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private 10, // ptr32_sptr 11, // ptr32_uptr 12 // ptr64 diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2359,7 +2359,7 @@ if (Context.getASTContext().addressSpaceMapManglingFor(AS)) { // ::= "AS" unsigned TargetAS = Context.getASTContext().getTargetAddressSpace(AS); - if (TargetAS != 0) + if (TargetAS != 0 || (Context.getASTContext().getLangOpts().SYCLIsDevice)) ASString = "AS" + llvm::utostr(TargetAS); } else { switch (AS) { diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -2073,10 +2073,13 @@ case LangAS::Default: return ""; case LangAS::opencl_global: + case LangAS::sycl_global: return "__global"; case LangAS::opencl_local: + case LangAS::sycl_local: return "__local"; case LangAS::opencl_private: + case LangAS::sycl_private: return "__private"; case LangAS::opencl_constant: return "__constant"; diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -573,18 +573,10 @@ return new X86_64TargetInfo(Triple, Opts); } - case llvm::Triple::spir: { - if (Triple.getOS() != llvm::Triple::UnknownOS || - Triple.getEnvironment() != llvm::Triple::UnknownEnvironment) - return nullptr; + case llvm::Triple::spir: return new SPIR32TargetInfo(Triple, Opts); - } - case llvm::Triple::spir64: { - if (Triple.getOS() != llvm::Triple::UnknownOS || - Triple.getEnvironment() != llvm::Triple::UnknownEnvironment) - return nullptr; + case llvm::Triple::spir64: return new SPIR64TargetInfo(Triple, Opts); - } case llvm::Triple::wasm32: if (Triple.getSubArch() != llvm::Triple::NoSubArch || Triple.getVendor() != llvm::Triple::UnknownVendor || diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -51,6 +51,9 @@ Global, // cuda_device Constant, // cuda_constant Local, // cuda_shared + Global, // sycl_global + Local, // sycl_local + Private, // sycl_private Generic, // ptr32_sptr Generic, // ptr32_uptr Generic // ptr64 @@ -68,6 +71,9 @@ Global, // cuda_device Constant, // cuda_constant Local, // cuda_shared + Global, // sycl_global + Local, // sycl_local + Private, // sycl_private Generic, // ptr32_sptr Generic, // ptr32_uptr Generic // ptr64 diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -35,6 +35,9 @@ 1, // cuda_device 4, // cuda_constant 3, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr 0 // ptr64 diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -33,6 +33,29 @@ 0, // cuda_device 0, // cuda_constant 0, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private + 0, // ptr32_sptr + 0, // ptr32_uptr + 0 // ptr64 +}; + +static const unsigned SYCLAddrSpaceMap[] = { + 4, // Default + 1, // opencl_global + 3, // opencl_local + 2, // opencl_constant + 0, // opencl_private + 4, // opencl_generic + 5, // opencl_global_device + 6, // opencl_global_host + 0, // cuda_device + 0, // cuda_constant + 0, // cuda_shared + 1, // sycl_global + 3, // sycl_local + 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr 0 // ptr64 @@ -42,14 +65,14 @@ public: SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { - assert(getTriple().getOS() == llvm::Triple::UnknownOS && - "SPIR target must use unknown OS"); - assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && - "SPIR target must use unknown environment type"); TLSSupported = false; VLASupported = false; LongWidth = LongAlign = 64; - AddrSpaceMap = &SPIRAddrSpaceMap; + if (Triple.getEnvironment() == llvm::Triple::SYCLDevice) { + AddrSpaceMap = &SYCLAddrSpaceMap; + } else { + AddrSpaceMap = &SPIRAddrSpaceMap; + } UseAddrSpaceMapMangling = true; HasLegalHalfType = true; HasFloat16 = true; @@ -97,6 +120,16 @@ return CC_SpirFunction; } + llvm::Optional getConstantAddressSpace() const override { + // If we assign "opencl_constant" address space the following code becomes + // illegal, because it can't be casted to any other address space: + // + // const char *getLiteral() n{ + // return "AB"; + // } + return LangAS::sycl_global; + } + void setSupportedOpenCLOpts() override { // Assume all OpenCL extensions and optional core features are supported // for SPIR since it is a generic target. @@ -105,6 +138,7 @@ bool hasExtIntType() const override { return true; } }; + class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { public: SPIR32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) diff --git a/clang/lib/Basic/Targets/TCE.h b/clang/lib/Basic/Targets/TCE.h --- a/clang/lib/Basic/Targets/TCE.h +++ b/clang/lib/Basic/Targets/TCE.h @@ -42,6 +42,9 @@ 0, // cuda_device 0, // cuda_constant 0, // cuda_shared + 3, // sycl_global + 4, // sycl_local + 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr 0, // ptr64 diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -35,6 +35,9 @@ 0, // cuda_device 0, // cuda_constant 0, // cuda_shared + 0, // sycl_global + 0, // sycl_local + 0, // sycl_private 270, // ptr32_sptr 271, // ptr32_uptr 272 // ptr64 diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9984,7 +9984,17 @@ public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} + + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; + unsigned getOpenCLKernelCallingConv() const override; + bool shouldEmitStaticExternCAliases() const override; }; } // End anonymous namespace. @@ -10002,6 +10012,32 @@ return llvm::CallingConv::SPIR_KERNEL; } +bool SPIRTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { + return false; +} + +LangAS SPIRTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { + assert(!CGM.getLangOpts().OpenCL && + !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && + "Address space agnostic languages only"); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::sycl_global)); + if (!D) + return DefaultGlobalAS; + + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); + if (AddrSpace != LangAS::Default) + return AddrSpace; + + if (CGM.isTypeConstant(D->getType(), false)) { + if (auto ConstAS = CGM.getTarget().getConstantAddressSpace()) + return ConstAS.getValue(); + } + return DefaultGlobalAS; +} + static bool appendType(SmallStringEnc &Enc, QualType QType, const CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6345,6 +6345,7 @@ llvm::APSInt max(addrSpace.getBitWidth()); max = Qualifiers::MaxAddressSpace - (unsigned)LangAS::FirstTargetAddressSpace; + if (addrSpace > max) { S.Diag(AttrLoc, diag::err_attribute_address_space_too_high) << (unsigned)max.getZExtValue() << AddrSpace->getSourceRange(); @@ -6478,7 +6479,9 @@ Attr.setInvalid(); } else { // The keyword-based type attributes imply which address space to use. - ASIdx = Attr.asOpenCLLangAS(); + ASIdx = S.getLangOpts().SYCLIsDevice ? Attr.asSYCLLangAS() + : Attr.asOpenCLLangAS(); + if (ASIdx == LangAS::Default) llvm_unreachable("Invalid address space"); diff --git a/clang/test/CodeGenSYCL/address-space-cond-op.cpp b/clang/test/CodeGenSYCL/address-space-cond-op.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-space-cond-op.cpp @@ -0,0 +1,49 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -x c++ -triple spir64-unknown-linux-sycldevice -disable-llvm-passes -fsycl -fsycl-is-device -emit-llvm %s -o - | FileCheck %s + +struct S { + unsigned short x; +}; + +// CHECK-LABEL: @_Z3foobR1SS_( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast i8* [[COND_ADDR]] to i8 addrspace(4)* +// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT_S:%.*]] addrspace(4)*, align 8 +// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT_S]] addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8 +// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1 +// CHECK-NEXT: store [[STRUCT_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT_S]] addrspace(4)* +// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1 +// CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT_S]] addrspace(4)*, [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)* +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)* +// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false) +// CHECK-NEXT: ret void +// +S foo(bool cond, S &lhs, S rhs) { + S val = cond ? lhs : rhs; + return val; +} + +template +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + kernel([]() { + S lhs, rhs; + foo(true, lhs, rhs); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-space-of-returns.cpp b/clang/test/CodeGenSYCL/address-space-of-returns.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-space-of-returns.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm -x c++ %s -o - | FileCheck %s + +struct A { + int B[42]; +}; + +const char *ret_char() { + return "N"; +} +// CHECK: ret i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* addrspacecast ([2 x i8] addrspace(1)* @.str to [2 x i8] addrspace(4)*), i64 0, i64 0) + +const char *ret_arr() { + static const char Arr[42] = {0}; + return Arr; +} +// CHECK: ret i8 addrspace(4)* getelementptr inbounds ([42 x i8], [42 x i8] addrspace(4)* addrspacecast ([42 x i8] addrspace(1)* @{{.*}}ret_arr{{.*}}Arr to [42 x i8] addrspace(4)*), i64 0, i64 0) + +const char &ret_ref() { + static const char a = 'A'; + return a; +} +// CHECK: ret i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @{{.*}}ret_ref{{.*}} to i8 addrspace(4)*) + +A ret_agg() { + A a; + return a; +} +// CHECK: define spir_func void @{{.*}}ret_agg{{.*}}(%struct.A addrspace(4)* noalias sret(%struct.A) align 4 %agg.result) + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +int main() { + kernel_single_task([]() { + ret_char(); + ret_arr(); + ret_ref(); + ret_agg(); + }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp @@ -0,0 +1,131 @@ +// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +void bar(int &Data) {} +// CHECK-DAG: define spir_func void @[[RAW_REF:[a-zA-Z0-9_]+]](i32 addrspace(4)* align 4 dereferenceable(4) % +void bar2(int &Data) {} +// CHECK-DAG: define spir_func void @[[RAW_REF2:[a-zA-Z0-9_]+]](i32 addrspace(4)* align 4 dereferenceable(4) % +void bar(__attribute__((opencl_local)) int &Data) {} +// CHECK-DAG: define spir_func void [[LOC_REF:@[a-zA-Z0-9_]+]](i32 addrspace(3)* align 4 dereferenceable(4) % +void foo(int *Data) {} +// CHECK-DAG: define spir_func void @[[RAW_PTR:[a-zA-Z0-9_]+]](i32 addrspace(4)* % +void foo2(int *Data) {} +// CHECK-DAG: define spir_func void @[[RAW_PTR2:[a-zA-Z0-9_]+]](i32 addrspace(4)* % +void foo(__attribute__((opencl_local)) int *Data) {} +// CHECK-DAG: define spir_func void [[LOC_PTR:@[a-zA-Z0-9_]+]](i32 addrspace(3)* % + +template +void tmpl(T t) {} +// See Check Lines below. + +void usages() { + // CHECK-DAG: [[GLOB:%[a-zA-Z0-9]+]] = alloca i32 addrspace(1)* + // CHECK-DAG: [[GLOB]].ascast = addrspacecast i32 addrspace(1)** [[GLOB]] to i32 addrspace(1)* addrspace(4)* + __attribute__((opencl_global)) int *GLOB; + // CHECK-DAG: [[LOC:%[a-zA-Z0-9]+]] = alloca i32 addrspace(3)* + // CHECK-DAG: [[LOC]].ascast = addrspacecast i32 addrspace(3)** [[LOC]] to i32 addrspace(3)* addrspace(4)* + __attribute__((opencl_local)) int *LOC; + // CHECK-DAG: [[NoAS:%[a-zA-Z0-9]+]] = alloca i32 addrspace(4)* + // CHECK-DAG: [[NoAS]].ascast = addrspacecast i32 addrspace(4)** [[NoAS]] to i32 addrspace(4)* addrspace(4)* + int *NoAS; + // CHECK-DAG: [[PRIV:%[a-zA-Z0-9]+]] = alloca i32* + // CHECK-DAG: [[PRIV]].ascast = addrspacecast i32** [[PRIV]] to i32* addrspace(4)* + __attribute__((opencl_private)) int *PRIV; + + bar(*GLOB); + // CHECK-DAG: [[GLOB_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[GLOB_CAST]]) + bar2(*GLOB); + // CHECK-DAG: [[GLOB_LOAD2:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST2:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD2]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[GLOB_CAST2]]) + + bar(*LOC); + // CHECK-DAG: [[LOC_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: call spir_func void [[LOC_REF]](i32 addrspace(3)* align 4 dereferenceable(4) [[LOC_LOAD]]) + bar2(*LOC); + // CHECK-DAG: [[LOC_LOAD2:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: [[LOC_CAST2:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOC_LOAD2]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[LOC_CAST2]]) + + bar(*NoAS); + // CHECK-DAG: [[NoAS_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[NoAS_LOAD]]) + bar2(*NoAS); + // CHECK-DAG: [[NoAS_LOAD2:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[NoAS_LOAD2]]) + + foo(GLOB); + // CHECK-DAG: [[GLOB_LOAD3:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST3:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD3]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_PTR]](i32 addrspace(4)* [[GLOB_CAST3]]) + foo2(GLOB); + // CHECK-DAG: [[GLOB_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST4:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD4]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_PTR2]](i32 addrspace(4)* [[GLOB_CAST4]]) + foo(LOC); + // CHECK-DAG: [[LOC_LOAD3:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: call spir_func void [[LOC_PTR]](i32 addrspace(3)* [[LOC_LOAD3]]) + foo2(LOC); + // CHECK-DAG: [[LOC_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: [[LOC_CAST4:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOC_LOAD4]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_PTR2]](i32 addrspace(4)* [[LOC_CAST4]]) + foo(NoAS); + // CHECK-DAG: [[NoAS_LOAD3:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_PTR]](i32 addrspace(4)* [[NoAS_LOAD3]]) + foo2(NoAS); + // CHECK-DAG: [[NoAS_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void @[[RAW_PTR2]](i32 addrspace(4)* [[NoAS_LOAD4]]) + + // Ensure that we still get 3 different template instantiations. + tmpl(GLOB); + // CHECK-DAG: [[GLOB_LOAD4:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: call spir_func void [[GLOB_TMPL:@[a-zA-Z0-9_]+]](i32 addrspace(1)* [[GLOB_LOAD4]]) + tmpl(LOC); + // CHECK-DAG: [[LOC_LOAD5:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOC]].ascast + // CHECK-DAG: call spir_func void [[LOC_TMPL:@[a-zA-Z0-9_]+]](i32 addrspace(3)* [[LOC_LOAD5]]) + tmpl(PRIV); + // CHECK-DAG: [[PRIV_LOAD5:%[a-zA-Z0-9]+]] = load i32*, i32* addrspace(4)* [[PRIV]].ascast + // CHECK-DAG: call spir_func void [[PRIV_TMPL:@[a-zA-Z0-9_]+]](i32* [[PRIV_LOAD5]]) + tmpl(NoAS); + // CHECK-DAG: [[NoAS_LOAD5:%[a-zA-Z0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* [[NoAS]].ascast + // CHECK-DAG: call spir_func void [[GEN_TMPL:@[a-zA-Z0-9_]+]](i32 addrspace(4)* [[NoAS_LOAD5]]) +} + +// CHECK-DAG: define linkonce_odr spir_func void [[GLOB_TMPL]](i32 addrspace(1)* % +// CHECK-DAG: define linkonce_odr spir_func void [[LOC_TMPL]](i32 addrspace(3)* % +// CHECK-DAG: define linkonce_odr spir_func void [[PRIV_TMPL]](i32* % +// CHECK-DAG: define linkonce_odr spir_func void [[GEN_TMPL]](i32 addrspace(4)* % + +void usages2() { + __attribute__((opencl_private)) int *PRIV; + // CHECK-DAG: [[PRIV:%[a-zA-Z0-9]+]] = alloca i32* + // CHECK-DAG: [[PRIV]].ascast = addrspacecast i32** [[PRIV]] to i32* addrspace(4)* + __attribute__((opencl_global)) int *GLOB; + // CHECK-DAG: [[GLOB:%[a-zA-Z0-9]+]] = alloca i32 addrspace(1)* + // CHECK-DAG: [[GLOB]].ascast = addrspacecast i32 addrspace(1)** [[GLOB]] to i32 addrspace(1)* addrspace(4)* + __attribute__((opencl_local)) int *LOCAL; + // CHECK-DAG: [[LOCAL:%[a-zA-Z0-9]+]] = alloca i32 addrspace(3)* + // CHECK-DAG: [[LOCAL]].ascast = addrspacecast i32 addrspace(3)** [[LOCAL]] to i32 addrspace(3)* addrspace(4)* + + bar(*PRIV); + // CHECK-DAG: [[PRIV_LOAD:%[a-zA-Z0-9]+]] = load i32*, i32* addrspace(4)* [[PRIV]].ascast + // CHECK-DAG: [[PRIV_ASCAST:%[a-zA-Z0-9]+]] = addrspacecast i32* [[PRIV_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[PRIV_ASCAST]]) + bar(*GLOB); + // CHECK-DAG: [[GLOB_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* [[GLOB]].ascast + // CHECK-DAG: [[GLOB_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(1)* [[GLOB_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF]](i32 addrspace(4)* align 4 dereferenceable(4) [[GLOB_CAST]]) + bar2(*LOCAL); + // CHECK-DAG: [[LOCAL_LOAD:%[a-zA-Z0-9]+]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LOCAL]].ascast + // CHECK-DAG: [[LOCAL_CAST:%[a-zA-Z0-9]+]] = addrspacecast i32 addrspace(3)* [[LOCAL_LOAD]] to i32 addrspace(4)* + // CHECK-DAG: call spir_func void @[[RAW_REF2]](i32 addrspace(4)* align 4 dereferenceable(4) [[LOCAL_CAST]]) +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} +int main() { + kernel_single_task([]() { usages();usages2(); }); + return 0; +} diff --git a/clang/test/CodeGenSYCL/address-spaces.cpp b/clang/test/CodeGenSYCL/address-spaces.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenSYCL/address-spaces.cpp @@ -0,0 +1,109 @@ +// RUN: %clang_cc1 -triple spir64-unknown-linux-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm -x c++ %s -o - | FileCheck %s + +struct Padding { + int i, j; +}; + +struct HasX { + int x; +}; + +struct Y : Padding, HasX {}; + +void bar(HasX &hx); + +// CHECK: @_ZZ4testvE3foo = internal addrspace(1) constant i32 66, align 4 +// CHECK: @_ZZ4testvE4bars = internal addrspace(1) constant <{ [21 x i32], [235 x i32] }> <{ [21 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20], [235 x i32] zeroinitializer }>, align 4 +// CHECK: @[[STR:[.a-zA-Z0-9_]+]] = private unnamed_addr addrspace(1) constant [14 x i8] c"Hello, world!\00", align 1 + +// CHECK-LABEL: @_Z3baz{{.*}} +void baz(Y &y) { + // CHECK: %[[FIRST:[a-zA-Z0-9]+]] = bitcast %struct.Y addrspace(4)* %{{.*}} to i8 addrspace(4)* + // CHECK: %[[OFFSET:[a-zA-Z0-9]+]].ptr = getelementptr inbounds i8, i8 addrspace(4)* %[[FIRST]], i64 8 + // CHECK: %[[SECOND:[a-zA-Z0-9]+]] = bitcast i8 addrspace(4)* %[[OFFSET]].ptr to %struct.HasX addrspace(4)* + // CHECK: call spir_func void @{{.*}}bar{{.*}}(%struct.HasX addrspace(4)* align 4 dereferenceable(4) %[[SECOND]]) + bar(y); +} + +// CHECK-LABEL: @_Z4testv +void test() { + static const int foo = 0x42; + + // CHECK: %i.ascast = addrspacecast i32* %i to i32 addrspace(4)* + // CHECK: %[[ARR:[a-zA-Z0-9]+]] = alloca [42 x i32] + // CHECK: %[[ARR]].ascast = addrspacecast [42 x i32]* %[[ARR]] to [42 x i32] addrspace(4)* + + int i = 0; + int *pptr = &i; + // CHECK: store i32 addrspace(4)* %i.ascast, i32 addrspace(4)* addrspace(4)* %pptr.ascast + bool is_i_ptr = (pptr == &i); + // CHECK: %[[VALPPTR:[0-9]+]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %pptr.ascast + // CHECK: %cmp{{[0-9]*}} = icmp eq i32 addrspace(4)* %[[VALPPTR]], %i.ascast + *pptr = foo; + + int var23 = 23; + char *cp = (char *)&var23; + *cp = 41; + // CHECK: store i32 23, i32 addrspace(4)* %[[VAR:[a-zA-Z0-9.]+]] + // CHECK: [[VARCAST:%.*]] = bitcast i32 addrspace(4)* %[[VAR]] to i8 addrspace(4)* + // CHECK: store i8 addrspace(4)* [[VARCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} + + int arr[42]; + char *cpp = (char *)arr; + *cpp = 43; + // CHECK: [[ARRDECAY:%.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 + // CHECK: [[ARRCAST:%.*]] = bitcast i32 addrspace(4)* [[ARRDECAY]] to i8 addrspace(4)* + // CHECK: store i8 addrspace(4)* [[ARRCAST]], i8 addrspace(4)* addrspace(4)* %{{.*}} + + int *aptr = arr + 10; + if (aptr < arr + sizeof(arr)) + *aptr = 44; + // CHECK: %[[VALAPTR:.*]] = load i32 addrspace(4)*, i32 addrspace(4)* addrspace(4)* %aptr.ascast + // CHECK: %[[ARRDCY2:.*]] = getelementptr inbounds [42 x i32], [42 x i32] addrspace(4)* %[[ARR]].ascast, i64 0, i64 0 + // CHECK: %[[ADDPTR:.*]] = getelementptr inbounds i32, i32 addrspace(4)* %[[ARRDCY2]], i64 168 + // CHECK: %cmp{{[0-9]+}} = icmp ult i32 addrspace(4)* %[[VALAPTR]], %[[ADDPTR]] + + const char *str = "Hello, world!"; + // CHECK: store i8 addrspace(4)* getelementptr inbounds ([14 x i8], [14 x i8] addrspace(4)* addrspacecast ([14 x i8] addrspace(1)* @[[STR]] to [14 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* %[[STRVAL:[a-zA-Z0-9]+]].ascast, align 8 + + i = str[0]; + + const char *phi_str = i > 2 ? str : "Another hello world!"; + (void)phi_str; + // CHECK: %[[COND:[a-zA-Z0-9]+]] = icmp sgt i32 %{{.*}}, 2 + // CHECK: br i1 %[[COND]], label %[[CONDTRUE:[.a-zA-Z0-9]+]], label %[[CONDFALSE:[.a-zA-Z0-9]+]] + + // CHECK: [[CONDTRUE]]: + // CHECK-NEXT: %[[VALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %str.ascast + // CHECK-NEXT: br label %[[CONDEND:[.a-zA-Z0-9]+]] + + // CHECK: [[CONDFALSE]]: + + // CHECK: [[CONDEND]]: + // CHECK-NEXT: phi i8 addrspace(4)* [ %[[VALTRUE]], %[[CONDTRUE]] ], [ getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @{{.*}} to [21 x i8] addrspace(4)*), i64 0, i64 0), %[[CONDFALSE]] ] + + const char *select_null = i > 2 ? "Yet another Hello world" : nullptr; + (void)select_null; + // CHECK: select i1 %{{.*}}, i8 addrspace(4)* getelementptr inbounds ([24 x i8], [24 x i8] addrspace(4)* addrspacecast ([24 x i8] addrspace(1)* @{{.*}} to [24 x i8] addrspace(4)*), i64 0, i64 0) + + const char *select_str_trivial1 = true ? str : "Another hello world!"; + (void)select_str_trivial1; + // CHECK: %[[TRIVIALTRUE:[a-zA-Z0-9]+]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[STRVAL]] + // CHECK: store i8 addrspace(4)* %[[TRIVIALTRUE]], i8 addrspace(4)* addrspace(4)* %{{.*}}, align 8 + + const char *select_str_trivial2 = false ? str : "Another hello world!"; + (void)select_str_trivial2; + // CHECK: store i8 addrspace(4)* getelementptr inbounds ([21 x i8], [21 x i8] addrspace(4)* addrspacecast ([21 x i8] addrspace(1)* @{{.*}} to [21 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* %{{.*}} + Y yy; + baz(yy); +} + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +int main() { + kernel_single_task([]() { test(); }); + return 0; +} diff --git a/clang/test/SemaSYCL/address-space-parameter-conversions.cpp b/clang/test/SemaSYCL/address-space-parameter-conversions.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaSYCL/address-space-parameter-conversions.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only -x c++ %s + +void bar(int &Data) {} +void bar2(int &Data) {} +void bar(__attribute__((opencl_private)) int &Data) {} +void foo(int *Data) {} +void foo2(int *Data) {} +void foo(__attribute__((opencl_private)) int *Data) {} + +template +void tmpl(T *t) {} + +void usages() { + __attribute__((opencl_global)) int *GLOB; + __attribute__((opencl_private)) int *PRIV; + __attribute__((opencl_local)) int *LOC; + int *NoAS; + + bar(*GLOB); + bar2(*GLOB); + + bar(*PRIV); + bar2(*PRIV); + + bar(*NoAS); + bar2(*NoAS); + + bar(*LOC); + bar2(*LOC); + + foo(GLOB); + foo2(GLOB); + foo(PRIV); + foo2(PRIV); + foo(NoAS); + foo2(NoAS); + foo(LOC); + foo2(LOC); + + tmpl(GLOB); + tmpl(PRIV); + tmpl(NoAS); + tmpl(LOC); + + (void)static_cast(GLOB); + (void)static_cast(GLOB); + // FIXME: determine if we can warn on the below conversions. + int *i = GLOB; + void *v = GLOB; + (void)i; + (void)v; + + __generic int *IsGeneric; // expected-error{{unknown type name '__generic'}} + __private int *IsPrivate; // expected-error{{unknown type name '__private'}} + __global int *IsGlobal; // expected-error{{unknown type name '__global'}} + __local int *IsLocal; // expected-error{{unknown type name '__local'}} +} diff --git a/clang/test/SemaTemplate/address_space-dependent.cpp b/clang/test/SemaTemplate/address_space-dependent.cpp --- a/clang/test/SemaTemplate/address_space-dependent.cpp +++ b/clang/test/SemaTemplate/address_space-dependent.cpp @@ -43,7 +43,7 @@ template void tooBig() { - __attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388593)}} + __attribute__((address_space(I))) int *bounds; // expected-error {{address space is larger than the maximum supported (8388590)}} } template @@ -101,7 +101,7 @@ car<1, 2, 3>(); // expected-note {{in instantiation of function template specialization 'car<1, 2, 3>' requested here}} HasASTemplateFields<1> HASTF; neg<-1>(); // expected-note {{in instantiation of function template specialization 'neg<-1>' requested here}} - correct<0x7FFFF1>(); + correct<0x7FFFED>(); tooBig<8388650>(); // expected-note {{in instantiation of function template specialization 'tooBig<8388650>' requested here}} __attribute__((address_space(1))) char *x; diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -217,8 +217,9 @@ Itanium, Cygnus, CoreCLR, + SYCLDevice, Simulator, // Simulator variants of other systems, e.g., Apple's iOS - MacABI, // Mac Catalyst variant of Apple's iOS deployment target. + MacABI, // Mac Catalyst variant of Apple's iOS deployment target. LastEnvironmentType = MacABI }; enum ObjectFormatType { @@ -492,6 +493,10 @@ isMacCatalystEnvironment())); } + bool isSYCLDeviceEnvironment() const { + return getEnvironment() == Triple::SYCLDevice; + } + bool isOSNetBSD() const { return getOS() == Triple::NetBSD; } diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -244,6 +244,8 @@ case Musl: return "musl"; case MuslEABI: return "musleabi"; case MuslEABIHF: return "musleabihf"; + case SYCLDevice: + return "sycldevice"; case Simulator: return "simulator"; } @@ -529,26 +531,27 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { return StringSwitch(EnvironmentName) - .StartsWith("eabihf", Triple::EABIHF) - .StartsWith("eabi", Triple::EABI) - .StartsWith("gnuabin32", Triple::GNUABIN32) - .StartsWith("gnuabi64", Triple::GNUABI64) - .StartsWith("gnueabihf", Triple::GNUEABIHF) - .StartsWith("gnueabi", Triple::GNUEABI) - .StartsWith("gnux32", Triple::GNUX32) - .StartsWith("code16", Triple::CODE16) - .StartsWith("gnu", Triple::GNU) - .StartsWith("android", Triple::Android) - .StartsWith("musleabihf", Triple::MuslEABIHF) - .StartsWith("musleabi", Triple::MuslEABI) - .StartsWith("musl", Triple::Musl) - .StartsWith("msvc", Triple::MSVC) - .StartsWith("itanium", Triple::Itanium) - .StartsWith("cygnus", Triple::Cygnus) - .StartsWith("coreclr", Triple::CoreCLR) - .StartsWith("simulator", Triple::Simulator) - .StartsWith("macabi", Triple::MacABI) - .Default(Triple::UnknownEnvironment); + .StartsWith("eabihf", Triple::EABIHF) + .StartsWith("eabi", Triple::EABI) + .StartsWith("gnuabin32", Triple::GNUABIN32) + .StartsWith("gnuabi64", Triple::GNUABI64) + .StartsWith("gnueabihf", Triple::GNUEABIHF) + .StartsWith("gnueabi", Triple::GNUEABI) + .StartsWith("gnux32", Triple::GNUX32) + .StartsWith("code16", Triple::CODE16) + .StartsWith("gnu", Triple::GNU) + .StartsWith("android", Triple::Android) + .StartsWith("musleabihf", Triple::MuslEABIHF) + .StartsWith("musleabi", Triple::MuslEABI) + .StartsWith("musl", Triple::Musl) + .StartsWith("msvc", Triple::MSVC) + .StartsWith("itanium", Triple::Itanium) + .StartsWith("cygnus", Triple::Cygnus) + .StartsWith("coreclr", Triple::CoreCLR) + .StartsWith("sycldevice", Triple::SYCLDevice) + .StartsWith("simulator", Triple::Simulator) + .StartsWith("macabi", Triple::MacABI) + .Default(Triple::UnknownEnvironment); } static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {