Index: include/clang/AST/ASTContext.h =================================================================== --- include/clang/AST/ASTContext.h +++ include/clang/AST/ASTContext.h @@ -434,6 +434,7 @@ friend class CXXRecordDecl; const TargetInfo *Target; + const TargetInfo *AuxTarget; clang::PrintingPolicy PrintingPolicy; public: @@ -520,7 +521,8 @@ } const TargetInfo &getTargetInfo() const { return *Target; } - + const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } + /// getIntTypeForBitwidth - /// sets integer QualTy according to specified details: /// bitwidth, signed/unsigned. @@ -2403,9 +2405,10 @@ /// This routine may only be invoked once for a given ASTContext object. /// It is normally invoked after ASTContext construction. /// - /// \param Target The target - void InitBuiltinTypes(const TargetInfo &Target); - + /// \param Target The target + void InitBuiltinTypes(const TargetInfo &Target, + const TargetInfo *AuxTarget = nullptr); + private: void InitBuiltinType(CanQualType &R, BuiltinType::Kind K); Index: include/clang/Basic/Builtins.h =================================================================== --- include/clang/Basic/Builtins.h +++ include/clang/Basic/Builtins.h @@ -56,15 +56,23 @@ /// \brief Holds information about both target-independent and /// target-specific builtins, allowing easy queries by clients. +/// +/// Builtins from an optional auxiliary target are stored in +/// AuxTSRecords. Their IDs are shifted up by NumTSRecords and need to +/// be translated back with getAuxBuiltinID() before use. class Context { const Info *TSRecords; + const Info *AuxTSRecords; unsigned NumTSRecords; + unsigned NumAuxTSRecords; + public: Context(); /// \brief Perform target-specific initialization - void initializeTarget(const TargetInfo &Target); - + /// \param AuxTarget Target info to incorporate builtins from. May be nullptr. + void InitializeTarget(const TargetInfo &Target, const TargetInfo *AuxTarget); + /// \brief Mark the identifiers for all the builtins with their /// appropriate builtin ID # and mark any non-portable builtin identifiers as /// such. @@ -176,6 +184,15 @@ return getRecord(ID).Features; } + /// \brief Return true if builtin ID belongs to AuxTarget. + bool isAuxBuiltinID(unsigned ID) const { + return ID >= (Builtin::FirstTSBuiltin + NumTSRecords); + } + + /// Return real buitin ID (i.e. ID it would have furing compilation + /// for AuxTarget). + unsigned getAuxBuiltinID(unsigned ID) const { return ID - NumTSRecords; } + private: const Info &getRecord(unsigned ID) const; Index: include/clang/Driver/CC1Options.td =================================================================== --- include/clang/Driver/CC1Options.td +++ include/clang/Driver/CC1Options.td @@ -325,6 +325,8 @@ def ast_merge : Separate<["-"], "ast-merge">, MetaVarName<"">, HelpText<"Merge the given AST file into the translation unit being compiled.">; +def aux_triple : Separate<["-"], "aux-triple">, + HelpText<"Auxiliary target triple.">; def code_completion_at : Separate<["-"], "code-completion-at">, MetaVarName<"::">, HelpText<"Dump code-completion information at a location">; Index: include/clang/Frontend/CompilerInstance.h =================================================================== --- include/clang/Frontend/CompilerInstance.h +++ include/clang/Frontend/CompilerInstance.h @@ -78,6 +78,9 @@ /// The target being compiled for. IntrusiveRefCntPtr Target; + /// Auxiliary Target info. + IntrusiveRefCntPtr AuxTarget; + /// The virtual file system. IntrusiveRefCntPtr VirtualFileSystem; @@ -352,6 +355,15 @@ void setTarget(TargetInfo *Value); /// } + /// @name AuxTarget Info + /// { + + TargetInfo *getAuxTarget() const { return AuxTarget.get(); } + + /// Replace the current AuxTarget. + void setAuxTarget(TargetInfo *Value); + + /// } /// @name Virtual File System /// { Index: include/clang/Frontend/FrontendOptions.h =================================================================== --- include/clang/Frontend/FrontendOptions.h +++ include/clang/Frontend/FrontendOptions.h @@ -256,7 +256,10 @@ /// \brief File name of the file that will provide record layouts /// (in the format produced by -fdump-record-layouts). std::string OverrideRecordLayoutsFile; - + + /// \brief Auxiliary triple for CUDA compilation. + std::string AuxTriple; + public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), Index: include/clang/Lex/Preprocessor.h =================================================================== --- include/clang/Lex/Preprocessor.h +++ include/clang/Lex/Preprocessor.h @@ -98,6 +98,7 @@ DiagnosticsEngine *Diags; LangOptions &LangOpts; const TargetInfo *Target; + const TargetInfo *AuxTarget; FileManager &FileMgr; SourceManager &SourceMgr; std::unique_ptr ScratchBuf; @@ -656,7 +657,10 @@ /// /// \param Target is owned by the caller and must remain valid for the /// lifetime of the preprocessor. - void Initialize(const TargetInfo &Target); + /// \param AuxTarget is owned by the caller and must remain valid for + /// the lifetime of the preprocessor. + void Initialize(const TargetInfo &Target, + const TargetInfo *AuxTarget = nullptr); /// \brief Initialize the preprocessor to parse a model file /// @@ -678,6 +682,7 @@ const LangOptions &getLangOpts() const { return LangOpts; } const TargetInfo &getTargetInfo() const { return *Target; } + const TargetInfo *getAuxTargetInfo() const { return AuxTarget; } FileManager &getFileManager() const { return FileMgr; } SourceManager &getSourceManager() const { return SourceMgr; } HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -742,10 +742,10 @@ FirstLocalImport(), LastLocalImport(), ExternCContext(nullptr), SourceMgr(SM), LangOpts(LOpts), SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)), - AddrSpaceMap(nullptr), Target(nullptr), PrintingPolicy(LOpts), - Idents(idents), Selectors(sels), BuiltinInfo(builtins), - DeclarationNames(*this), ExternalSource(nullptr), Listener(nullptr), - Comments(SM), CommentsLoaded(false), + AddrSpaceMap(nullptr), Target(nullptr), AuxTarget(nullptr), + PrintingPolicy(LOpts), Idents(idents), Selectors(sels), + BuiltinInfo(builtins), DeclarationNames(*this), ExternalSource(nullptr), + Listener(nullptr), Comments(SM), CommentsLoaded(false), CommentCommandTraits(BumpAlloc, LOpts.CommentOpts), LastSDM(nullptr, 0) { TUDecl = TranslationUnitDecl::Create(*this); } @@ -955,13 +955,15 @@ Types.push_back(Ty); } -void ASTContext::InitBuiltinTypes(const TargetInfo &Target) { +void ASTContext::InitBuiltinTypes(const TargetInfo &Target, + const TargetInfo *AuxTarget) { assert((!this->Target || this->Target == &Target) && "Incorrect target reinitialization"); assert(VoidTy.isNull() && "Context reinitialized?"); this->Target = &Target; - + this->AuxTarget = AuxTarget; + ABI.reset(createCXXABI(Target)); AddrSpaceMap = getAddressSpaceMap(Target, LangOpts); AddrSpaceMapMangling = isAddrSpaceMapManglingEnabled(Target, LangOpts); Index: lib/Basic/Builtins.cpp =================================================================== --- lib/Basic/Builtins.cpp +++ lib/Basic/Builtins.cpp @@ -32,19 +32,27 @@ const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const { if (ID < Builtin::FirstTSBuiltin) return BuiltinInfo[ID]; - assert(ID - Builtin::FirstTSBuiltin < NumTSRecords && "Invalid builtin ID!"); + assert(ID - Builtin::FirstTSBuiltin < (NumTSRecords + NumAuxTSRecords) && + "Invalid builtin ID!"); + if (isAuxBuiltinID(ID)) + return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin]; return TSRecords[ID - Builtin::FirstTSBuiltin]; } Builtin::Context::Context() { // Get the target specific builtins from the target. TSRecords = nullptr; + AuxTSRecords = nullptr; NumTSRecords = 0; + NumAuxTSRecords = 0; } -void Builtin::Context::initializeTarget(const TargetInfo &Target) { +void Builtin::Context::InitializeTarget(const TargetInfo &Target, + const TargetInfo *AuxTarget) { assert(NumTSRecords == 0 && "Already initialized target?"); Target.getTargetBuiltins(TSRecords, NumTSRecords); + if (AuxTarget) + AuxTarget->getTargetBuiltins(AuxTSRecords, NumAuxTSRecords); } bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, @@ -76,7 +84,12 @@ // Step #2: Register target-specific builtins. for (unsigned i = 0, e = NumTSRecords; i != e; ++i) if (builtinIsSupported(TSRecords[i], LangOpts)) - Table.get(TSRecords[i].Name).setBuiltinID(i+Builtin::FirstTSBuiltin); + Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin); + + // Step #3: Register target-specific builtins for AuxTarget. + for (unsigned i = 0, e = NumAuxTSRecords; i != e; ++i) + Table.get(AuxTSRecords[i].Name) + .setBuiltinID(i + Builtin::FirstTSBuiltin + NumTSRecords); } void Builtin::Context::forgetBuiltin(unsigned ID, IdentifierTable &Table) { Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -1865,40 +1865,54 @@ return GetUndefRValue(E->getType()); } -Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - switch (getTarget().getTriple().getArch()) { +static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, + unsigned BuiltinID, const CallExpr *E, + llvm::Triple::ArchType Arch) { + switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: - return EmitARMBuiltinExpr(BuiltinID, E); + return CGF->EmitARMBuiltinExpr(BuiltinID, E); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - return EmitAArch64BuiltinExpr(BuiltinID, E); + return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); case llvm::Triple::x86: case llvm::Triple::x86_64: - return EmitX86BuiltinExpr(BuiltinID, E); + return CGF->EmitX86BuiltinExpr(BuiltinID, E); case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: - return EmitPPCBuiltinExpr(BuiltinID, E); + return CGF->EmitPPCBuiltinExpr(BuiltinID, E); case llvm::Triple::r600: case llvm::Triple::amdgcn: - return EmitAMDGPUBuiltinExpr(BuiltinID, E); + return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); case llvm::Triple::systemz: - return EmitSystemZBuiltinExpr(BuiltinID, E); + return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - return EmitNVPTXBuiltinExpr(BuiltinID, E); + return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); case llvm::Triple::wasm32: case llvm::Triple::wasm64: - return EmitWebAssemblyBuiltinExpr(BuiltinID, E); + return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); default: return nullptr; } } +Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { + assert(getContext().getAuxTargetInfo() && "Missing aux target info"); + return EmitTargetArchBuiltinExpr( + this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, + getContext().getAuxTargetInfo()->getTriple().getArch()); + } + + return EmitTargetArchBuiltinExpr(this, BuiltinID, E, + getTarget().getTriple().getArch()); +} + static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool V1Ty=false) { Index: lib/Frontend/CompilerInstance.cpp =================================================================== --- lib/Frontend/CompilerInstance.cpp +++ lib/Frontend/CompilerInstance.cpp @@ -78,9 +78,8 @@ Diagnostics = Value; } -void CompilerInstance::setTarget(TargetInfo *Value) { - Target = Value; -} +void CompilerInstance::setTarget(TargetInfo *Value) { Target = Value; } +void CompilerInstance::setAuxTarget(TargetInfo *Value) { AuxTarget = Value; } void CompilerInstance::setFileManager(FileManager *Value) { FileMgr = Value; @@ -312,7 +311,7 @@ PP = new Preprocessor(&getPreprocessorOpts(), getDiagnostics(), getLangOpts(), getSourceManager(), *HeaderInfo, *this, PTHMgr, /*OwnsHeaderSearch=*/true, TUKind); - PP->Initialize(getTarget()); + PP->Initialize(getTarget(), getAuxTarget()); // Note that this is different then passing PTHMgr to Preprocessor's ctor. // That argument is used as the IdentifierInfoLookup argument to @@ -396,7 +395,7 @@ auto *Context = new ASTContext(getLangOpts(), PP.getSourceManager(), PP.getIdentifierTable(), PP.getSelectorTable(), PP.getBuiltinInfo()); - Context->InitBuiltinTypes(getTarget()); + Context->InitBuiltinTypes(getTarget(), getAuxTarget()); setASTContext(Context); } @@ -800,6 +799,13 @@ if (!hasTarget()) return false; + // Create TargetInfo for the other side of CUDA compilation. + if (getLangOpts().CUDA && !getFrontendOpts().AuxTriple.empty()) { + std::shared_ptr TO(new TargetOptions); + TO->Triple = getFrontendOpts().AuxTriple; + setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO)); + } + // Inform the target of the language options. // // FIXME: We shouldn't need to do this, the target should be immutable once Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -975,6 +975,9 @@ Opts.OverrideRecordLayoutsFile = Args.getLastArgValue(OPT_foverride_record_layout_EQ); + Opts.AuxTriple = + llvm::Triple::normalize(Args.getLastArgValue(OPT_aux_triple)); + if (const Arg *A = Args.getLastArg(OPT_arcmt_check, OPT_arcmt_modify, OPT_arcmt_migrate)) { Index: lib/Frontend/InitPreprocessor.cpp =================================================================== --- lib/Frontend/InitPreprocessor.cpp +++ lib/Frontend/InitPreprocessor.cpp @@ -918,6 +918,10 @@ // Install things like __POWERPC__, __GNUC__, etc into the macro table. if (InitOpts.UsePredefines) { + if (LangOpts.CUDA && PP.getAuxTargetInfo()) + InitializePredefinedMacros(*PP.getAuxTargetInfo(), LangOpts, FEOpts, + Builder); + InitializePredefinedMacros(PP.getTargetInfo(), LangOpts, FEOpts, Builder); // Install definitions to make Objective-C++ ARC work well with various Index: lib/Lex/Preprocessor.cpp =================================================================== --- lib/Lex/Preprocessor.cpp +++ lib/Lex/Preprocessor.cpp @@ -62,20 +62,19 @@ IdentifierInfoLookup *IILookup, bool OwnsHeaders, TranslationUnitKind TUKind) : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr), - FileMgr(Headers.getFileMgr()), SourceMgr(SM), - ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers), + AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM), + ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers), TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), Identifiers(opts, IILookup), PragmaHandlers(new PragmaNamespace(StringRef())), - IncrementalProcessing(false), TUKind(TUKind), - CodeComplete(nullptr), CodeCompletionFile(nullptr), - CodeCompletionOffset(0), LastTokenWasAt(false), - ModuleImportExpectsIdentifier(false), CodeCompletionReached(0), - MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr), - CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), - Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState), - MacroArgCache(nullptr), Record(nullptr), - MIChainHead(nullptr), DeserialMIChainHead(nullptr) { + IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr), + CodeCompletionFile(nullptr), CodeCompletionOffset(0), + LastTokenWasAt(false), ModuleImportExpectsIdentifier(false), + CodeCompletionReached(0), MainFileDir(nullptr), + SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr), + CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr), + CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr), + Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) { OwnsHeaderSearch = OwnsHeaders; CounterValue = 0; // __COUNTER__ starts at 0. @@ -170,13 +169,18 @@ delete &HeaderInfo; } -void Preprocessor::Initialize(const TargetInfo &Target) { +void Preprocessor::Initialize(const TargetInfo &Target, + const TargetInfo *AuxTarget) { assert((!this->Target || this->Target == &Target) && "Invalid override of target information"); this->Target = &Target; - + + assert((!this->AuxTarget || this->AuxTarget == AuxTarget) && + "Invalid override of aux target information."); + this->AuxTarget = AuxTarget; + // Initialize information about built-ins. - BuiltinInfo.initializeTarget(Target); + BuiltinInfo.InitializeTarget(Target, AuxTarget); HeaderInfo.setTarget(Target); } Index: lib/Sema/SemaDecl.cpp =================================================================== --- lib/Sema/SemaDecl.cpp +++ lib/Sema/SemaDecl.cpp @@ -11293,11 +11293,11 @@ if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads && Context.BuiltinInfo.isTSBuiltin(BuiltinID) && !FD->hasAttr() && !FD->hasAttr()) { - // Target-specific builtins are assumed to be intended for use - // in this particular CUDA compilation mode and should have - // appropriate attribute set so we can enforce CUDA function - // call restrictions. - if (getLangOpts().CUDAIsDevice) + // Assign appropriate attribute depending on CUDA compilation + // mode and the target builtin belongs to. E.g. during host + // compilation, aux builtins are __device__, the rest are __host__. + if (getLangOpts().CUDAIsDevice != + Context.BuiltinInfo.isAuxBuiltinID(BuiltinID)) FD->addAttr(CUDADeviceAttr::CreateImplicit(Context, FD->getLocation())); else FD->addAttr(CUDAHostAttr::CreateImplicit(Context, FD->getLocation())); Index: test/SemaCUDA/builtins.cu =================================================================== --- test/SemaCUDA/builtins.cu +++ test/SemaCUDA/builtins.cu @@ -1,36 +1,31 @@ -// Tests that target-specific builtins have appropriate host/device -// attributes and that CUDA call restrictions are enforced. Also -// verify that non-target builtins can be used from both host and -// device functions. +// Tests that host and target builtins can be used in the same TU, +// have appropriate host/device attributes and that CUDA call +// restrictions are enforced. Also verify that non-target builtins can +// be used from both host and device functions. // // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target // RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -aux-triple nvptx64-unknown-cuda \ // RUN: -fcuda-target-overloads -fsyntax-only -verify %s // RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \ +// RUN: -aux-triple x86_64-unknown-unknown \ // RUN: -fcuda-target-overloads -fsyntax-only -verify %s +#if !(defined(__amd64__) && defined(__PTX__)) +#error "Expected to see preprocessor macros from both sides of compilation." +#endif -#ifdef __CUDA_ARCH__ -// Device-side builtins are not allowed to be called from host functions. void hf() { - int x = __builtin_ptx_read_tid_x(); // expected-note {{'__builtin_ptx_read_tid_x' declared here}} + int x = __builtin_ia32_rdtsc(); + int y = __builtin_ptx_read_tid_x(); // expected-note {{'__builtin_ptx_read_tid_x' declared here}} // expected-error@-1 {{reference to __device__ function '__builtin_ptx_read_tid_x' in __host__ function}} x = __builtin_abs(1); } + __attribute__((device)) void df() { int x = __builtin_ptx_read_tid_x(); + int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}} + // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}} x = __builtin_abs(1); } -#else -// Host-side builtins are not allowed to be called from device functions. -__attribute__((device)) void df() { - int x = __builtin_ia32_rdtsc(); // expected-note {{'__builtin_ia32_rdtsc' declared here}} - // expected-error@-1 {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}} - x = __builtin_abs(1); -} -void hf() { - int x = __builtin_ia32_rdtsc(); - x = __builtin_abs(1); -} -#endif