Index: include/clang/Driver/Action.h =================================================================== --- include/clang/Driver/Action.h +++ include/clang/Driver/Action.h @@ -68,6 +68,17 @@ JobClassLast=VerifyPCHJobClass }; + // The offloading kind determines if this action is binded to a particular + // programming model. Each entry reserves one bit. + // + // FIXME: This is currently used to indicate that toolchains are used in a + // given programming as well, but will be used here as well once a generic + // offloading action is implemented. + enum OffloadKind { + OFFLOAD_None = 0x00, + OFFLOAD_CUDA = 0x01, + }; + static const char *getClassName(ActionClass AC); private: Index: include/clang/Driver/Compilation.h =================================================================== --- include/clang/Driver/Compilation.h +++ include/clang/Driver/Compilation.h @@ -38,8 +38,17 @@ /// The default tool chain. const ToolChain &DefaultToolChain; - const ToolChain *CudaHostToolChain; - const ToolChain *CudaDeviceToolChain; + /// The tool chain of the offload host. + const ToolChain *OffloadHostToolChain; + + /// The host offload kinds, it will be a mask of all the programming models + /// the host has to support. + unsigned OffloadHostKinds; + + /// Array with the toolchains of offloading devices in the order they were + /// requested by the user. + typedef std::pair OffloadToolChainTy; + SmallVector OrderedOffloadingToolchains; /// The original (untranslated) input argument list. llvm::opt::InputArgList *Args; @@ -89,16 +98,73 @@ const Driver &getDriver() const { return TheDriver; } const ToolChain &getDefaultToolChain() const { return DefaultToolChain; } - const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; } - const ToolChain *getCudaDeviceToolChain() const { - return CudaDeviceToolChain; + const ToolChain *getOffloadingHostToolChain() const { + return OffloadHostToolChain; + } + unsigned isOffloadingHostKind(Action::OffloadKind Kind) const { + return OffloadHostKinds & Kind; } - void setCudaHostToolChain(const ToolChain *HostToolChain) { - CudaHostToolChain = HostToolChain; + /// Iterator that visits device toolchains of a given kind. + template + class specific_offload_kind_iterator + : public llvm::iterator_adaptor_base< + specific_offload_kind_iterator, + ArrayRef::const_iterator, + std::forward_iterator_tag, OffloadToolChainTy, ptrdiff_t, + OffloadToolChainTy, OffloadToolChainTy> { + ArrayRef::const_iterator End; + + void SkipKinds() { + while (this->I != End && this->I->second != Kind) + ++this->I; + } + + public: + explicit specific_offload_kind_iterator(ArrayRef TCs) + : specific_offload_kind_iterator::iterator_adaptor_base(TCs.begin()), + End(TCs.end()) { + SkipKinds(); + } + + const ToolChain *operator*() const { return this->I->first; } + const ToolChain *operator->() const { return **this; } + + specific_offload_kind_iterator &operator++() { + ++this->I; + SkipKinds(); + return *this; + } + }; + + template + llvm::iterator_range> + getOffloadDeviceToolChains() const { + return {specific_offload_kind_iterator(OrderedOffloadingToolchains), + specific_offload_kind_iterator( + llvm::makeArrayRef(OrderedOffloadingToolchains.end(), 0))}; } - void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) { - CudaDeviceToolChain = DeviceToolChain; + + // Return an offload device toolchain of the provided kind. Only one is + // expected to exist. If we can't match any toolchain, return nullptr. + template + const ToolChain *getSingleOffloadDeviceToolChain() const { + auto TCs = getOffloadDeviceToolChains(); + + if (TCs.begin() != TCs.end()) { + assert(std::next(TCs.begin()) == TCs.end() && + "More than one tool chain of the this kind exist."); + return *TCs.begin(); + } + return nullptr; + } + + void addOffloadDeviceToolChain(const ToolChain *DeviceToolChain, + Action::OffloadKind OffloadKind) { + // Update the host offload kind to also contain this kind. + OffloadHostKinds |= OffloadKind; + OrderedOffloadingToolchains.push_back( + std::make_pair(DeviceToolChain, OffloadKind)); } const llvm::opt::InputArgList &getInputArgs() const { return *Args; } Index: include/clang/Driver/Driver.h =================================================================== --- include/clang/Driver/Driver.h +++ include/clang/Driver/Driver.h @@ -275,6 +275,11 @@ /// @name Primary Functionality /// @{ + /// CreateOffloadingDeviceToolChains - create all the toolchains required to + /// support offloading devices given the programming models specified in the + /// current compilation. Also, update the host tool chain kind accordingly. + void CreateOffloadingDeviceToolChains(Compilation &C, InputList &Inputs); + /// BuildCompilation - Construct a compilation object for a command /// line argument vector. /// Index: lib/Driver/Compilation.cpp =================================================================== --- lib/Driver/Compilation.cpp +++ lib/Driver/Compilation.cpp @@ -25,7 +25,7 @@ Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain, InputArgList *_Args, DerivedArgList *_TranslatedArgs) : TheDriver(D), DefaultToolChain(_DefaultToolChain), - CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr), + OffloadHostToolChain(&DefaultToolChain), OffloadHostKinds(0u), Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr), ForDiagnostics(false) {} Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -396,6 +396,32 @@ } } +void Driver::CreateOffloadingDeviceToolChains(Compilation &C, + InputList &Inputs) { + + // + // CUDA + // + // We need to generate a CUDA toolchain if any of the inputs has a CUDA type. + for (auto &I : Inputs) + // Have we founs a CUDA file? If so generate the toolchain. + if (types::isCuda(I.first)) { + const ToolChain &TC = getToolChain( + C.getInputArgs(), + llvm::Triple(C.getOffloadingHostToolChain()->getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda")); + C.addOffloadDeviceToolChain(&TC, Action::OFFLOAD_CUDA); + break; + } + + // + // Add support for other offloading programming models here. + // + + return; +} + Compilation *Driver::BuildCompilation(ArrayRef ArgList) { llvm::PrettyStackTraceString CrashInfo("Compilation construction"); @@ -507,10 +533,6 @@ // The compilation takes ownership of Args. Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs); - C->setCudaDeviceToolChain( - &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit() - ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"))); if (!HandleImmediateArgs(*C)) return C; @@ -518,6 +540,9 @@ InputList Inputs; BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs); + // Get the toolchains for the offloading devices, if any. + CreateOffloadingDeviceToolChains(*C, Inputs); + // Construct the list of abstract actions to perform for this compilation. On // MachO targets this uses the driver-driver and universal actions. if (TC.getTriple().isOSBinFormatMachO()) @@ -1331,7 +1356,7 @@ CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg)); // Build actions for all device inputs. - assert(C.getCudaDeviceToolChain() && + assert(C.getSingleOffloadDeviceToolChain() && "Missing toolchain for device-side compilation."); ActionList CudaDeviceActions; C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions); @@ -1971,7 +1996,8 @@ // Initial processing of CudaDeviceAction carries host params. // Call BuildJobsForAction() again, now with correct device parameters. InputInfo II = BuildJobsForAction( - C, *CDA->input_begin(), C.getCudaDeviceToolChain(), + C, *CDA->input_begin(), + C.getSingleOffloadDeviceToolChain(), CDA->getGpuArchName(), CDA->isAtTopLevel(), /*MultipleArchs=*/true, LinkingOutput, CachedResults); // Currently II's Action is *CDA->input_begin(). Set it to CDA instead, so Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -3583,10 +3583,11 @@ // particular compilation pass we're constructing here. For now we // can check which toolchain we're using and pick the other one to // extract the triple. - if (&getToolChain() == C.getCudaDeviceToolChain()) - AuxToolChain = C.getCudaHostToolChain(); - else if (&getToolChain() == C.getCudaHostToolChain()) - AuxToolChain = C.getCudaDeviceToolChain(); + if (&getToolChain() == + C.getSingleOffloadDeviceToolChain()) + AuxToolChain = C.getOffloadingHostToolChain(); + else if (&getToolChain() == C.getOffloadingHostToolChain()) + AuxToolChain = C.getSingleOffloadDeviceToolChain(); else llvm_unreachable("Can't figure out CUDA compilation mode."); assert(AuxToolChain != nullptr && "No aux toolchain.");