Index: include/clang/Driver/Compilation.h =================================================================== --- include/clang/Driver/Compilation.h +++ include/clang/Driver/Compilation.h @@ -68,10 +68,27 @@ /// The root list of jobs. JobList Jobs; - /// Cache of translated arguments for a particular tool chain and bound - /// architecture. - llvm::DenseMap, - llvm::opt::DerivedArgList *> TCArgs; + /// Cache of translated arguments for a particular tool chain, bound + /// architecture, and device offload kind. + struct TCArgsKey final { + const ToolChain *TC = nullptr; + const char *BoundArch = nullptr; + Action::OffloadKind DeviceOffloadKind = Action::OFK_None; + bool operator<(const TCArgsKey &K) const { + if (TC < K.TC) + return true; + else if (TC == K.TC && BoundArch < K.BoundArch) + return true; + else if (TC == K.TC && BoundArch == K.BoundArch && + DeviceOffloadKind < K.DeviceOffloadKind) + return true; + return false; + } + TCArgsKey(const ToolChain *TC, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) + : TC(TC), BoundArch(BoundArch), DeviceOffloadKind(DeviceOffloadKind) {} + }; + std::map TCArgs; /// Temporary files which should be removed on exit. llvm::opt::ArgStringList TempFiles; @@ -182,10 +199,15 @@ /// getArgsForToolChain - Return the derived argument list for the /// tool chain \p TC (or the default tool chain, if TC is not specified). + /// If a device offloading kind is specified, a translation specific for that + /// kind is performed, if any. /// /// \param BoundArch - The bound architecture name, or 0. - const llvm::opt::DerivedArgList &getArgsForToolChain(const ToolChain *TC, - const char *BoundArch); + /// \param DeviceOffloadKind - The offload device kind that should be used in + /// the translation, if any. + const llvm::opt::DerivedArgList & + getArgsForToolChain(const ToolChain *TC, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind); /// addTempFile - Add a file to remove on exit, and returns its /// argument. Index: include/clang/Driver/ToolChain.h =================================================================== --- include/clang/Driver/ToolChain.h +++ include/clang/Driver/ToolChain.h @@ -176,12 +176,15 @@ /// TranslateArgs - Create a new derived argument list for any argument /// translations this ToolChain may wish to perform, or 0 if no tool chain - /// specific translations are needed. + /// specific translations are needed. If \p DeviceOffloadKind is specified + /// the translation specific for that offload kind is performed. /// /// \param BoundArch - The bound architecture name, or 0. + /// \param DeviceOffloadKind - The device offload kind used for the + /// translation. virtual llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const { + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) const { return nullptr; } Index: lib/Driver/Compilation.cpp =================================================================== --- lib/Driver/Compilation.cpp +++ lib/Driver/Compilation.cpp @@ -37,11 +37,9 @@ delete Args; // Free any derived arg lists. - for (llvm::DenseMap, - DerivedArgList*>::iterator it = TCArgs.begin(), - ie = TCArgs.end(); it != ie; ++it) - if (it->second != TranslatedArgs) - delete it->second; + for (auto &Arg : TCArgs) + if (Arg.second != TranslatedArgs) + delete Arg.second; // Free redirections of stdout/stderr. if (Redirects) { @@ -52,14 +50,15 @@ } } -const DerivedArgList &Compilation::getArgsForToolChain(const ToolChain *TC, - const char *BoundArch) { +const DerivedArgList & +Compilation::getArgsForToolChain(const ToolChain *TC, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) { if (!TC) TC = &DefaultToolChain; - DerivedArgList *&Entry = TCArgs[std::make_pair(TC, BoundArch)]; + DerivedArgList *&Entry = TCArgs[{TC, BoundArch, DeviceOffloadKind}]; if (!Entry) { - Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch); + Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind); if (!Entry) Entry = TranslatedArgs; } Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -3001,8 +3001,10 @@ } llvm::errs() << "], output: " << Result.getAsString() << "\n"; } else { - T->ConstructJob(C, *JA, Result, InputInfos, - C.getArgsForToolChain(TC, BoundArch), LinkingOutput); + T->ConstructJob( + C, *JA, Result, InputInfos, + C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), + LinkingOutput); } return Result; } Index: lib/Driver/MSVCToolChain.cpp =================================================================== --- lib/Driver/MSVCToolChain.cpp +++ lib/Driver/MSVCToolChain.cpp @@ -806,7 +806,7 @@ llvm::opt::DerivedArgList * MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const { + const char *BoundArch, Action::OffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); Index: lib/Driver/ToolChains.h =================================================================== --- lib/Driver/ToolChains.h +++ lib/Driver/ToolChains.h @@ -208,6 +208,9 @@ bool isPIEDefault() const override; bool isPICDefaultForced() const override; bool IsIntegratedAssemblerDefault() const override; + llvm::opt::DerivedArgList * + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; protected: Tool *getTool(Action::ActionClass AC) const override; @@ -306,8 +309,8 @@ bool HasNativeLLVMSupport() const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; bool IsBlocksDefault() const override { // Always allow blocks on Apple; users interested in versioning are @@ -511,8 +514,8 @@ bool isCrossCompiling() const override { return false; } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; CXXStdlibType GetDefaultCXXStdlibType() const override; ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const override; @@ -843,8 +846,8 @@ const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; @@ -1011,8 +1014,8 @@ const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const override; + TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) const override; bool IsIntegratedAssemblerDefault() const override; bool IsUnwindTablesDefault() const override; Index: lib/Driver/ToolChains.cpp =================================================================== --- lib/Driver/ToolChains.cpp +++ lib/Driver/ToolChains.cpp @@ -809,7 +809,8 @@ } DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, - const char *BoundArch) const { + const char *BoundArch, + Action::OffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); @@ -1038,10 +1039,12 @@ AddLinkRuntimeLib(Args, CmdArgs, CompilerRT, false, true); } -DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args, - const char *BoundArch) const { +DerivedArgList * +Darwin::TranslateArgs(const DerivedArgList &Args, const char *BoundArch, + Action::OffloadKind DeviceOffloadKind) const { // First get the generic Apple args, before moving onto Darwin-specific ones. - DerivedArgList *DAL = MachO::TranslateArgs(Args, BoundArch); + DerivedArgList *DAL = + MachO::TranslateArgs(Args, BoundArch, DeviceOffloadKind); const OptTable &Opts = getDriver().getOpts(); // If no architecture is bound, none of the translations here are relevant. @@ -2693,6 +2696,48 @@ return true; } +llvm::opt::DerivedArgList * +Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, const char *, + Action::OffloadKind DeviceOffloadKind) const { + + // If this tool chain is used for an OpenMP offloading device we have to make + // sure we always generate a shared library regardless the commands the user + // passed to the host. This is required because the runtime library requires + // to load the device image dynamically at run time. + if (DeviceOffloadKind == Action::OFK_OpenMP) { + DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); + const OptTable &Opts = getDriver().getOpts(); + + // Request the shared library. Given that these option are decided + // implicitelly, they do not refer to any base argument. + DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_shared)); + DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_fPIC)); + + // Filter all the arguments we don't care passing to the offloading + // toolchain as they can mess up with the creation of a shared library. + for (auto *A : Args) { + switch ((options::ID)A->getOption().getID()) { + default: + DAL->append(A); + break; + case options::OPT_shared: + case options::OPT_static: + case options::OPT_fPIC: + case options::OPT_fno_PIC: + case options::OPT_fpic: + case options::OPT_fno_pic: + case options::OPT_fPIE: + case options::OPT_fno_PIE: + case options::OPT_fpie: + case options::OPT_fno_pie: + break; + } + } + return DAL; + } + return nullptr; +} + void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args) const { const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion(); @@ -4668,7 +4713,8 @@ llvm::opt::DerivedArgList * CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - const char *BoundArch) const { + const char *BoundArch, + Action::OffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -169,25 +169,25 @@ // // Compile for the powerpc device. -/ -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T1BIN:.+\.out]]" {{.*}}"[[T1OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1BC:.+\.bc]]" "-x" "cpp-output" "[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T1ASM:.+\.s]]" "-x" "ir" "[[T1BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T1OBJ:.+\.o]]" "[[T1ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}[[T1OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" "-o" "[[T1BIN:.+\.out-device-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}[[T1OBJ]] // // Compile for the x86 device. // -// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "-x" "c" "[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS: ld" {{.*}}"-o" "[[T2BIN:.+\.out]]" {{.*}}"[[T2OBJ]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-E" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2PP:.+\.i]]" "-x" "c" "[[INPUT]]" -// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2BC:.+\.bc]]" "-x" "cpp-output" "[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "[[T2ASM:.+\.s]]" "-x" "ir" "[[T2BC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "[[T2OBJ:.+\.o]]" "[[T2ASM]]" -// CHK-COMMANDS-ST: ld" {{.*}}"-o" "[[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]" {{.*}}[[T2OBJ]] +// CHK-COMMANDS-ST: ld" {{.*}}"-shared" "-o" "[[T2BIN:.+\.out-device-openmp-x86_64-pc-linux-gnu]]" {{.*}}[[T2OBJ]] // // Generate host object from the BC file and link using the linker script. @@ -228,5 +228,3 @@ // CHK-LKS: } // CHK-LKS: } // CHK-LKS: INSERT BEFORE .data - -