Index: lib/Driver/Driver.cpp =================================================================== --- lib/Driver/Driver.cpp +++ lib/Driver/Driver.cpp @@ -1547,8 +1547,9 @@ /// added to the provided host action \a HostAction. By default it is /// inactive. virtual ActionBuilderReturnCode - getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, - phases::ID FinalPhase, PhasesTy &Phases) { + getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) { return ABRT_Inactive; } @@ -1606,8 +1607,9 @@ : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {} ActionBuilderReturnCode - getDeviceDepences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, - phases::ID FinalPhase, PhasesTy &Phases) override { + getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) override { if (!IsActive) return ABRT_Inactive; @@ -1826,7 +1828,118 @@ } }; - /// Add the implementation for other specialized builders here. + /// OpenMP action builder. The host bitcode is passed to the device frontend + /// and all the device linked images are passed to the host link phase. + class OpenMPActionBuilder final : public DeviceActionBuilder { + /// The OpenMP actions for the current input. + ActionList OpenMPDeviceActions; + + /// The linker inputs obtained for each toolchain. + SmallVector DeviceLinkerInputs; + + public: + OpenMPActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs) + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {} + + ActionBuilderReturnCode + getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) override { + + // We should always have an action for each input. + assert(OpenMPDeviceActions.size() == ToolChains.size() && + "Number of OpenMP actions and toolchains do not match."); + + // The host only depends on device action in the linking phase, when all + // the device images have to be embedded in the host image. + if (CurPhase == phases::Link) { + assert(ToolChains.size() == DeviceLinkerInputs.size() && + "Toolchains and linker inputs sizes do not match."); + auto LI = DeviceLinkerInputs.begin(); + for (auto *A : OpenMPDeviceActions) { + LI->push_back(A); + ++LI; + } + + // We passed the device action as a host dependence, so we don't need to + // do anything else with them. + OpenMPDeviceActions.clear(); + return ABRT_Success; + } + + // By default, we produce an action for each device arch. + for (Action *&A : OpenMPDeviceActions) + A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + + return ABRT_Success; + } + + ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + + // If this is an input action replicate it for each OpenMP toolchain. + if (auto *IA = dyn_cast(HostAction)) { + OpenMPDeviceActions.clear(); + for (unsigned I = 0; I < ToolChains.size(); ++I) + OpenMPDeviceActions.push_back( + C.MakeAction(IA->getInputArg(), IA->getType())); + return ABRT_Success; + } + + // When generating code for OpenMP we use the host compile phase result as + // a dependence to the device compile phase so that it can learn what + // declarations should be emitted. However, this is not the only use for + // the host action, so we prevent it from being collapsed. + if (isa(HostAction)) { + HostAction->setCannotBeCollapsedWithNextDependentAction(); + assert(ToolChains.size() == OpenMPDeviceActions.size() && + "Toolchains and device action sizes do not match."); + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain(), + /*BoundArch=*/nullptr, Action::OFK_OpenMP); + auto TC = ToolChains.begin(); + for (Action *&A : OpenMPDeviceActions) { + assert(isa(A)); + OffloadAction::DeviceDependences DDep; + DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + A = C.MakeAction(HDep, DDep); + ++TC; + } + } + return ABRT_Success; + } + + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { + assert(ToolChains.size() == DeviceLinkerInputs.size() && + "Toolchains and linker inputs sizes do not match."); + + // Append a new link action for each device. + auto TC = ToolChains.begin(); + for (auto &LI : DeviceLinkerInputs) { + auto *DeviceLinkAction = + C.MakeAction(LI, types::TY_Image); + DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, + Action::OFK_OpenMP); + ++TC; + } + } + + bool initialize() override { + // Get the OpenMP toolchains. If we don't get any, the action builder will + // know there is nothing to do related to OpenMP offloading. + auto OpenMPTCRange = C.getOffloadToolChains(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; + ++TI) + ToolChains.push_back(TI->second); + + DeviceLinkerInputs.resize(ToolChains.size()); + return false; + } + }; + + /// + /// TODO: Add the implementation for other specialized builders here. + /// /// Specialized builders being used by this offloading action builder. SmallVector SpecializedBuilders; @@ -1842,6 +1955,9 @@ // Create a specialized builder for CUDA. SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs)); + // Create a specialized builder for OpenMP. + SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs)); + // // TODO: Build other specialized builders here. // @@ -1884,7 +2000,8 @@ continue; } - auto RetCode = SB->getDeviceDepences(DDeps, CurPhase, FinalPhase, Phases); + auto RetCode = + SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases); // If the builder explicitly says the host action should be ignored, // we need to increment the variable that tracks the builders that request Index: test/Driver/openmp-offload.c =================================================================== --- test/Driver/openmp-offload.c +++ test/Driver/openmp-offload.c @@ -2,6 +2,11 @@ /// Perform several driver tests for OpenMP offloading /// +// REQUIRES: clang-driver +// REQUIRES: x86-registered-target +// REQUIRES: powerpc-registered-target +// REQUIRES: nvptx-registered-target + /// ########################################################################### /// Check whether an invalid OpenMP target is specified: @@ -35,3 +40,136 @@ // RUN: %clang -### -ccc-print-phases -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DUPLICATES %s // CHK-DUPLICATES: warning: The OpenMP offloading target 'powerpc64le-ibm-linux-gnu' is similar to target 'powerpc64le-ibm-linux-gnu' already specified - will be ignored. + +/// ########################################################################### + +/// Check the phases graph when using a single target, different from the host. +/// We should have an offload action joining the host compile and device +/// preprocessor and another one joining the device linking outputs to the host +/// action. +// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES %s +// CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c, (host-openmp) +// CHK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) +// CHK-PHASES: 2: compiler, {1}, ir, (host-openmp) +// CHK-PHASES: 3: backend, {2}, assembler, (host-openmp) +// CHK-PHASES: 4: assembler, {3}, object, (host-openmp) +// CHK-PHASES: 5: linker, {4}, image, (host-openmp) +// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp) +// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp) +// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir +// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp) +// CHK-PHASES: 11: assembler, {10}, object, (device-openmp) +// CHK-PHASES: 12: linker, {11}, image, (device-openmp) +// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image + +/// ########################################################################### + +/// Check the phases when using multiple targets. Here we also add a library to +/// make sure it is treated as input by the device. +// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-LIB %s +// CHK-PHASES-LIB: 0: input, "somelib", object, (host-openmp) +// CHK-PHASES-LIB: 1: input, "[[INPUT:.+\.c]]", c, (host-openmp) +// CHK-PHASES-LIB: 2: preprocessor, {1}, cpp-output, (host-openmp) +// CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp) +// CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp) +// CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp) +// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp) +// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp) +// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir +// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp) +// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp) +// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp) +// CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 18: compiler, {17}, ir, (device-openmp) +// CHK-PHASES-LIB: 19: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {18}, ir +// CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp) +// CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp) +// CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp) +// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image + + +/// ########################################################################### + +/// Check the phases when using multiple targets and multiple source files +// RUN: echo " " > %t.c +// RUN: %clang -ccc-print-phases -lsomelib -fopenmp -target powerpc64-ibm-linux-gnu -fopenmp-targets=x86_64-pc-linux-gnu,powerpc64-ibm-linux-gnu %s %t.c 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-FILES %s +// CHK-PHASES-FILES: 0: input, "somelib", object, (host-openmp) +// CHK-PHASES-FILES: 1: input, "[[INPUT1:.+\.c]]", c, (host-openmp) +// CHK-PHASES-FILES: 2: preprocessor, {1}, cpp-output, (host-openmp) +// CHK-PHASES-FILES: 3: compiler, {2}, ir, (host-openmp) +// CHK-PHASES-FILES: 4: backend, {3}, assembler, (host-openmp) +// CHK-PHASES-FILES: 5: assembler, {4}, object, (host-openmp) +// CHK-PHASES-FILES: 6: input, "[[INPUT2:.+\.c]]", c, (host-openmp) +// CHK-PHASES-FILES: 7: preprocessor, {6}, cpp-output, (host-openmp) +// CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp) +// CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp) +// CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp) +// CHK-PHASES-FILES: 11: linker, {0, 5, 10}, image, (host-openmp) +// CHK-PHASES-FILES: 12: input, "somelib", object, (device-openmp) +// CHK-PHASES-FILES: 13: input, "[[INPUT1]]", c, (device-openmp) +// CHK-PHASES-FILES: 14: preprocessor, {13}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 15: compiler, {14}, ir, (device-openmp) +// CHK-PHASES-FILES: 16: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {15}, ir +// CHK-PHASES-FILES: 17: backend, {16}, assembler, (device-openmp) +// CHK-PHASES-FILES: 18: assembler, {17}, object, (device-openmp) +// CHK-PHASES-FILES: 19: input, "[[INPUT2]]", c, (device-openmp) +// CHK-PHASES-FILES: 20: preprocessor, {19}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 21: compiler, {20}, ir, (device-openmp) +// CHK-PHASES-FILES: 22: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {21}, ir +// CHK-PHASES-FILES: 23: backend, {22}, assembler, (device-openmp) +// CHK-PHASES-FILES: 24: assembler, {23}, object, (device-openmp) +// CHK-PHASES-FILES: 25: linker, {12, 18, 24}, image, (device-openmp) +// CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp) +// CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp) +// CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 29: compiler, {28}, ir, (device-openmp) +// CHK-PHASES-FILES: 30: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (powerpc64-ibm-linux-gnu)" {29}, ir +// CHK-PHASES-FILES: 31: backend, {30}, assembler, (device-openmp) +// CHK-PHASES-FILES: 32: assembler, {31}, object, (device-openmp) +// CHK-PHASES-FILES: 33: input, "[[INPUT2]]", c, (device-openmp) +// CHK-PHASES-FILES: 34: preprocessor, {33}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 35: compiler, {34}, ir, (device-openmp) +// CHK-PHASES-FILES: 36: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (powerpc64-ibm-linux-gnu)" {35}, ir +// CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp) +// CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp) +// CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp) +// CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image + +/// ########################################################################### + +/// Check the phases graph when using a single GPU target, and check the OpenMP +/// and CUDA phases are articulated correctly. +// RUN: %clang -ccc-print-phases -fopenmp -target powerpc64le-ibm-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -x cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES-WITH-CUDA %s +// CHK-PHASES-WITH-CUDA: 0: input, "[[INPUT:.+\.c]]", cuda, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 2: compiler, {1}, ir, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_20) +// CHK-PHASES-WITH-CUDA: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_20) +// CHK-PHASES-WITH-CUDA: 5: compiler, {4}, ir, (device-cuda, sm_20) +// CHK-PHASES-WITH-CUDA: 6: backend, {5}, assembler, (device-cuda, sm_20) +// CHK-PHASES-WITH-CUDA: 7: assembler, {6}, object, (device-cuda, sm_20) +// CHK-PHASES-WITH-CUDA: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {7}, object +// CHK-PHASES-WITH-CUDA: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_20)" {6}, assembler +// CHK-PHASES-WITH-CUDA: 10: linker, {8, 9}, cuda-fatbin, (device-cuda) +// CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir +// CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 14: linker, {13}, image, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 15: input, "[[INPUT]]", cuda, (device-openmp) +// CHK-PHASES-WITH-CUDA: 16: preprocessor, {15}, cuda-cpp-output, (device-openmp) +// CHK-PHASES-WITH-CUDA: 17: compiler, {16}, ir, (device-openmp) +// CHK-PHASES-WITH-CUDA: 18: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {17}, ir +// CHK-PHASES-WITH-CUDA: 19: backend, {18}, assembler, (device-openmp) +// CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp) +// CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp) +// CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image