Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -3110,7 +3110,7 @@ // We will pass the device action as a host dependence, so we don't // need to do anything else with them. CudaDeviceActions.clear(); - return ABRT_Success; + return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; } // By default, we produce an action for each device arch. @@ -3143,6 +3143,7 @@ assert(DeviceLinkerInputs.size() == GpuArchList.size() && "Linker inputs and GPU arch list sizes do not match."); + ActionList Actions; // Append a new link action for each device. unsigned I = 0; for (auto &LI : DeviceLinkerInputs) { @@ -3154,22 +3155,29 @@ OffloadAction::DeviceDependences DeviceLinkDeps; DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], AssociatedOffloadKind); - AL.push_back(C.MakeAction(DeviceLinkDeps, - DeviceLinkAction->getType())); + Actions.push_back(C.MakeAction( + DeviceLinkDeps, DeviceLinkAction->getType())); ++I; } DeviceLinkerInputs.clear(); // Create a host object from all the device images by embedding them - // in a fat binary. + // in a fat binary for mixed host-device compilation. For device-only + // compilation, creates a fat binary. OffloadAction::DeviceDependences DDeps; - auto *TopDeviceLinkAction = - C.MakeAction(AL, types::TY_Object); - DDeps.add(*TopDeviceLinkAction, *ToolChains[0], - nullptr, AssociatedOffloadKind); - - // Offload the host object to the host linker. - AL.push_back(C.MakeAction(DDeps, TopDeviceLinkAction->getType())); + if (!CompileDeviceOnly || !BundleOutput.hasValue() || + BundleOutput.getValue()) { + auto *TopDeviceLinkAction = C.MakeAction( + Actions, + CompileDeviceOnly ? types::TY_HIP_FATBIN : types::TY_Object); + DDeps.add(*TopDeviceLinkAction, *ToolChains[0], nullptr, + AssociatedOffloadKind); + // Offload the host object to the host linker. + AL.push_back( + C.MakeAction(DDeps, TopDeviceLinkAction->getType())); + } else { + AL.append(Actions); + } } Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); } @@ -3556,15 +3564,18 @@ return false; } - Action* makeHostLinkAction() { - // Build a list of device linking actions. - ActionList DeviceAL; + void appendDeviceLinkActions(ActionList &AL) { for (DeviceActionBuilder *SB : SpecializedBuilders) { if (!SB->isValid()) continue; - SB->appendLinkDeviceActions(DeviceAL); + SB->appendLinkDeviceActions(AL); } + } + Action *makeHostLinkAction() { + // Build a list of device linking actions. + ActionList DeviceAL; + appendDeviceLinkActions(DeviceAL); if (DeviceAL.empty()) return nullptr; @@ -3893,6 +3904,13 @@ } // Add a link action if necessary. + + if (LinkerInputs.empty()) { + Arg *FinalPhaseArg; + if (getFinalPhase(Args, &FinalPhaseArg) == phases::Link) + OffloadBuilder.appendDeviceLinkActions(Actions); + } + if (!LinkerInputs.empty()) { if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) LinkerInputs.push_back(Wrapper); Index: clang/test/Driver/hip-phases.hip =================================================================== --- clang/test/Driver/hip-phases.hip +++ clang/test/Driver/hip-phases.hip @@ -311,22 +311,36 @@ // // RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o \ -// RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2 %s +// RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-EM %s // -// L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object, (host-[[T:hip]]) -// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object, (host-[[T]]) -// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object, (host-[[T]]) -// RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object, (host-[[T]]) +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o \ +// RUN: -fgpu-rdc --cuda-device-only 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s -// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]]) +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o \ +// RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output 2>&1 \ +// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s + +// L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object +// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object +// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object +// RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object + +// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T:hip]], [[ARCH1:gfx803]]) // RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image // RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]]) // RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image -// RL2-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]]) -// RL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object - -// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]]) -// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]]) +// RL2-DEV-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, hip-fatbin, (device-[[T]]) +// RL2-DEV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, hip-fatbin +// RL2-EM-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]]) +// RL2-EM-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object +// RL2-NB-NOT: linker +// RL2-NB-NOT: offload + +// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T:hip]]) +// RL2-EM-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]]) +// RL2-DEV-NOT: linker // Test one gpu architectures up to the preprocessor expansion output phase in device-only // compilation mode. no bundle. Index: clang/test/Driver/hip-toolchain-rdc-separate.hip =================================================================== --- clang/test/Driver/hip-toolchain-rdc-separate.hip +++ clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -88,47 +88,66 @@ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ // RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ // RUN: %T/a.o %T/b.o \ -// RUN: 2>&1 | FileCheck -check-prefix=LINK %s +// RUN: 2>&1 | FileCheck -check-prefixes=LINK,LINK-HOST-UNBUNDLE,LLD-TMP,LINK-BUNDLE,LINK-EMBED %s -// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" -// LINK: "-unbundle" "-allow-missing-bundles" +// RUN: %clang --hip-link -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ +// RUN: %T/a.o %T/b.o --cuda-device-only \ +// RUN: 2>&1 | FileCheck -check-prefixes=LINK,LLD-TMP,LINK-BUNDLE,LINK-NOEMBED %s -// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" -// LINK: "-unbundle" "-allow-missing-bundles" +// RUN: %clang --hip-link -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ +// RUN: %T/a.o %T/b.o --cuda-device-only --no-gpu-bundle-output \ +// RUN: 2>&1 | FileCheck -check-prefixes=LINK,LLD-FIN,LINK-NOBUNDLE,LINK-NOEMBED %s + +// LINK-HOST-UNBUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-HOST-UNBUNDLE-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-HOST-UNBUNDLE-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" +// LINK-HOST-UNBUNDLE: "-unbundle" "-allow-missing-bundles" + +// LINK-HOST-UNBUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-HOST-UNBUNDLE-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" +// LINK-HOST-UNBUNDLE-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}" +// LINK-HOST-UNBUNDLE: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[A_O]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]" -// LINK: "-unbundle" "-allow-missing-bundles" +// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]" +// LINK-SAME: "-unbundle" "-allow-missing-bundles" // LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900" -// LINK-SAME: "-inputs=[[B_O]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]" -// LINK: "-unbundle" "-allow-missing-bundles" +// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]" +// LINK-SAME: "-unbundle" "-allow-missing-bundles" // LINK-NOT: "*.llvm-link" // LINK-NOT: ".*opt" // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" -// LINK: "-plugin-opt=mcpu=gfx803" -// LINK-SAME: "-o" "[[IMG_DEV1:.*.out]]" "[[A_BC1]]" "[[B_BC1]]" +// LINK-SAME: "-plugin-opt=mcpu=gfx803" +// LLD-TMP-SAME: "-o" "[[IMG_DEV1:.*.out]]" +// LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx803]]" +// LINK-SAME "[[A_BC1]]" "[[B_BC1]]" // LINK-NOT: "*.llvm-link" // LINK-NOT: ".*opt" // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" -// LINK: "-plugin-opt=mcpu=gfx900" -// LINK-SAME: "-o" "[[IMG_DEV2:.*.out]]" "[[A_BC2]]" "[[B_BC2]]" - -// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" -// LINK-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900" -// LINK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" - -// LINK: {{".*llvm-mc.*"}} "-o" "[[OBJBUNDLE:.*o]]" "{{.*}}.mcin" "--filetype=obj" - -// LINK: [[LD:".*ld.*"]] {{.*}} "-o" "a.out" {{.*}} "[[A_OBJ_HOST]]" -// LINK-SAME: "[[B_OBJ_HOST]]" "[[OBJBUNDLE]]" +// LINK-SAME: "-plugin-opt=mcpu=gfx900" +// LLD-TMP-SAME: "-o" "[[IMG_DEV2:.*.out]]" +// LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx900]]" +// LINK-SAME "[[A_BC2]]" "[[B_BC2]]" + +// LINK-BUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" +// LINK-BUNDLE-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900" +// LINK-BUNDLE-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*]]" +// LINK-NOBUNDLE-NOT: {{".*clang-offload-bundler"}} "-type=o" + +// LINK-EMBED: {{".*llvm-mc.*"}} "-o" "[[OBJBUNDLE:.*o]]" "{{.*}}.mcin" "--filetype=obj" +// LINK-NOEMBED-NOT: {{".*llvm-mc.*"}} "-o" + +// LINK-EMBED: [[LD:".*ld.*"]] {{.*}} "-o" "a.out" {{.*}} "[[A_OBJ_HOST]]" +// LINK-EMBED-SAME: "[[B_OBJ_HOST]]" "[[OBJBUNDLE]]" +// LINK-NOEMBED-NOT: {{".*ld.*"}} {{.*}} "-o" "a.out"