diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4478,6 +4478,15 @@ } } + // Compiling HIP in non-RDC mode requires linking each action individually. + for (Action *&A : DeviceActions) { + if (A->getType() != types::TY_Object || Kind != Action::OFK_HIP || + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) + continue; + ActionList LinkerInput = {A}; + A = C.MakeAction(LinkerInput, types::TY_Image); + } + auto TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); @@ -4497,12 +4506,21 @@ OffloadAction::DeviceDependences DDep; if (C.isOffloadingHostKind(Action::OFK_Cuda) && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) { - // If we are not in RDC-mode we just emit the final CUDA fatbinary for each - // translation unit without requiring any linking. + // If we are not in RDC-mode we just emit the final CUDA fatbinary for + // each translation unit without requiring any linking. Action *FatbinAction = C.MakeAction(OffloadActions, types::TY_CUDA_FATBIN); DDep.add(*FatbinAction, *C.getSingleOffloadToolChain(), nullptr, Action::OFK_Cuda); + } else if (C.isOffloadingHostKind(Action::OFK_HIP) && + !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, + false)) { + // If we are not in RDC-mode we just emit the final HIP fatbinary for each + // translation unit, linking each input individually. + Action *FatbinAction = + C.MakeAction(OffloadActions, types::TY_HIP_FATBIN); + DDep.add(*FatbinAction, *C.getSingleOffloadToolChain(), + nullptr, Action::OFK_HIP); } else { // Package all the offloading actions into a single output that can be // embedded in the host and linked. @@ -4511,6 +4529,7 @@ DDep.add(*PackagerAction, *C.getSingleOffloadToolChain(), nullptr, Action::OFK_None); } + OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch=*/nullptr, isa(HostAction) ? DDep : DDeps); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7006,7 +7006,7 @@ CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(CudaDeviceInput->getFilename()); } else if (!HostOffloadingInputs.empty()) { - if (IsCuda && !IsRDCMode) { + if ((IsCuda || IsHIP) && !IsRDCMode) { assert(HostOffloadingInputs.size() == 1 && "Only one input expected"); CmdArgs.push_back("-fcuda-include-gpubinary"); CmdArgs.push_back(HostOffloadingInputs.front().getFilename()); diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip --- a/clang/test/Driver/hip-binding.hip +++ b/clang/test/Driver/hip-binding.hip @@ -4,6 +4,9 @@ // RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ // RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s +// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu --offload-new-driver \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -c 2>&1 | FileCheck -check-prefix=NRDCS %s // NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]" // NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ1]]"], output: "[[IMG1:.*]]" // NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]" @@ -19,6 +22,14 @@ // RDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]"], output: "[[HOSTOBJ:.*o]]" // RDCS: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[BC1]]", "[[BC2]]", "[[HOSTOBJ]]"], output: "{{.*}}" +// RUN: %clang -ccc-print-bindings -target x86_64-linux-gnu --offload-new-driver \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ +// RUN: -c -fgpu-rdc 2>&1 | FileCheck -check-prefix=RDCS-NEW %s +// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HIP803:.+]]" +// RDCS-NEW: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[HIP900:.+]]" +// RDCS-NEW: # "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[HIP803]]", "[[HIP900]]"], output: "[[HIPFB:.+]]" +// RDCS-NEW: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[HIPFB]]"], output: "{{.*}}" + // RUN: touch %t.o // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\ diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -11,7 +11,10 @@ // // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=BIN,NRD %s +// RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s +// RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s // // RUN: %clang -x hip -target x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \ @@ -38,7 +41,8 @@ // RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object // NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) // NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) -// NRD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) +// OLD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) +// NEW-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]]) // RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]]) // diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -21,6 +21,16 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK %s +// RUN: %clang -### -target x86_64-linux-gnu -fno-gpu-rdc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ +// RUN: -fuse-ld=lld -nogpuinc --offload-new-driver -c \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK %s + // RUN: touch %T/a.o // RUN: touch %T/b.o // RUN: %clang -### -target x86_64-linux-gnu \