diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -221,13 +221,17 @@ // Add device inputs and propagate info to the device actions. Do work only if // we have dependencies. - for (unsigned i = 0, e = DDeps.getActions().size(); i != e; ++i) + for (unsigned i = 0, e = DDeps.getActions().size(); i != e; ++i) { if (auto *A = DDeps.getActions()[i]) { getInputs().push_back(A); A->propagateDeviceOffloadInfo(DDeps.getOffloadKinds()[i], DDeps.getBoundArchs()[i], DDeps.getToolChains()[i]); + // If this action is used to forward single dependency, set the toolchain. + if (DDeps.getActions().size() == 1) + OffloadingToolChain = DDeps.getToolChains()[i]; } + } } void OffloadAction::doOnHostDependence(const OffloadActionWorkTy &Work) const { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4436,6 +4436,9 @@ if (A->getType() == types::TY_Nothing) continue; + // Propagate the ToolChain so we can use it in ConstructPhaseAction. + A->propagateDeviceOffloadInfo(Kind, TCAndArch->second.data(), + TCAndArch->first); A = ConstructPhaseAction(C, Args, Phase, A, Kind); if (isa(A) && isa(HostAction) && @@ -4452,6 +4455,7 @@ DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); A = C.MakeAction(HDep, DDep); } + ++TCAndArch; } } @@ -4623,9 +4627,12 @@ return C.MakeAction(Input, Output); } if (Args.hasArg(options::OPT_emit_llvm) || - (TargetDeviceOffloadKind == Action::OFK_HIP && - Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, - false))) { + (((Input->getOffloadingToolChain() && + Input->getOffloadingToolChain()->getTriple().isAMDGPU()) || + TargetDeviceOffloadKind == Action::OFK_HIP) && + (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false) || + TargetDeviceOffloadKind == Action::OFK_OpenMP))) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC; return C.MakeAction(Input, Output); @@ -5304,25 +5311,6 @@ if (!T) return {InputInfo()}; - if (BuildingForOffloadDevice && - A->getOffloadingDeviceKind() == Action::OFK_OpenMP) { - if (TC->getTriple().isAMDGCN()) { - // AMDGCN treats backend and assemble actions as no-op because - // linker does not support object files. - if (const BackendJobAction *BA = dyn_cast(A)) { - return BuildJobsForAction(C, *BA->input_begin(), TC, BoundArch, - AtTopLevel, MultipleArchs, LinkingOutput, - CachedResults, TargetDeviceOffloadKind); - } - - if (const AssembleJobAction *AA = dyn_cast(A)) { - return BuildJobsForAction(C, *AA->input_begin(), TC, BoundArch, - AtTopLevel, MultipleArchs, LinkingOutput, - CachedResults, TargetDeviceOffloadKind); - } - } - } - // If we've collapsed action list that contained OffloadAction we // need to build jobs for host/device-side inputs it may have held. for (const auto *OA : CollapsedOffloadActions) @@ -5724,15 +5712,18 @@ // When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for // the unoptimized bitcode so that it does not get overwritten by the ".bc" // optimized bitcode output. - auto IsHIPRDCInCompilePhase = [](const JobAction &JA, - const llvm::opt::DerivedArgList &Args) { - // The relocatable compilation in HIP implies -emit-llvm. Similarly, use a - // ".tmp.bc" suffix for the unoptimized bitcode (generated in the compile - // phase.) + auto IsHIPRDCInCompilePhase = [&C](const JobAction &JA, + const llvm::opt::DerivedArgList &Args) { + // The relocatable compilation in HIP and OpenMP implies -emit-llvm. + // Similarly, use a ".tmp.bc" suffix for the unoptimized bitcode + // (generated in the compile phase.) + const ToolChain *TC = JA.getOffloadingToolChain(); return isa(JA) && - JA.getOffloadingDeviceKind() == Action::OFK_HIP && - Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, - false); + ((JA.getOffloadingDeviceKind() == Action::OFK_HIP && + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) || + (JA.getOffloadingDeviceKind() == Action::OFK_OpenMP && TC && + TC->getTriple().isAMDGPU())); }; if (!AtTopLevel && JA.getType() == types::TY_LLVM_BC && (C.getArgs().hasArg(options::OPT_emit_llvm) || diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c --- a/clang/test/Driver/amdgpu-openmp-toolchain.c +++ b/clang/test/Driver/amdgpu-openmp-toolchain.c @@ -1,9 +1,9 @@ // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa \ +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ // RUN: -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \ // RUN: | FileCheck %s -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa \ +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa \ // RUN: --offload-arch=gfx906 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib %s 2>&1 \ // RUN: | FileCheck %s @@ -13,7 +13,7 @@ // CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" // CHECK: clang-linker-wrapper{{.*}}"--"{{.*}} "-o" "a.out" -// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ +// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PHASES %s // CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp) // CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) @@ -22,39 +22,50 @@ // CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp) // CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp) // CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (amdgcn-amd-amdhsa)" {5}, ir -// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp) -// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp) -// CHECK-PHASES: 9: offload, "device-openmp (amdgcn-amd-amdhsa)" {8}, object -// CHECK-PHASES: 10: clang-offload-packager, {9}, image -// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {10}, ir -// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp) -// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp) -// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp) +// CHECK-PHASES: 7: backend, {6}, ir, (device-openmp) +// CHECK-PHASES: 8: offload, "device-openmp (amdgcn-amd-amdhsa)" {7}, ir +// CHECK-PHASES: 9: clang-offload-packager, {8}, image, (device-openmp) +// CHECK-PHASES: 10: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {9}, ir +// CHECK-PHASES: 11: backend, {10}, assembler, (host-openmp) +// CHECK-PHASES: 12: assembler, {11}, object, (host-openmp) +// CHECK-PHASES: 13: clang-linker-wrapper, {12}, image, (host-openmp) // handling of --libomptarget-amdgpu-bc-path -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 --libomptarget-amdgpu-bc-path=%S/Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc %s 2>&1 | FileCheck %s --check-prefix=CHECK-LIBOMPTARGET // CHECK-LIBOMPTARGET: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}Inputs/hip_dev_lib/libomptarget-amdgpu-gfx803.bc"{{.*}} -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOGPULIB // CHECK-NOGPULIB-NOT: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx803" "-fcuda-is-device" "-mlink-builtin-bitcode"{{.*}}libomptarget-amdgpu-gfx803.bc"{{.*}} -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" // CHECK-BINDINGS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_BC]]"], output: "[[BINARY:.+]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" // CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -save-temps -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS-TEMPS +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -save-temps -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa --offload-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS-TEMPS +// CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_PP:.+]]" +// CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_PP]]"], output: "[[HOST_BC:.+]]" +// CHECK-BINDINGS-TEMPS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[DEVICE_PP:.+]]" +// CHECK-BINDINGS-TEMPS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[DEVICE_PP]]", "[[HOST_BC]]"], output: "[[DEVICE_TEMP_BC:.+]]" +// CHECK-BINDINGS-TEMPS: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[DEVICE_TEMP_BC]]"], output: "[[DEVICE_BC:.+]]" +// CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_IMAGE:.+]]" +// CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_IMAGE]]"], output: "[[HOST_ASM:.+]]" +// CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "clang::as", inputs: ["[[HOST_ASM]]"], output: "[[HOST_OBJ:.+]]" +// CHECK-BINDINGS-TEMPS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx803 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR // CHECK-EMIT-LLVM-IR: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm" -// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp=libomp --offload-arch=gfx803 \ +// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 \ // RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE // CHECK-LIB-DEVICE: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc" -// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp=libomp --offload-arch=gfx803 -nogpulib \ +// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 -nogpulib \ // RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode -fopenmp-new-driver %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB // CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}ockl.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"