diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -152,6 +152,18 @@ addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); + // Given that host and device linking happen in separate processes, the device + // linker doesn't always have the visibility as to which device symbols are + // needed by a program, especially for the device symbol dependencies that are + // introduced through the host symbol resolution. + // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B() (B.obj) + // In this case, the device linker doesn't know that A.obj actually depends on + // the kernel functions in B.obj. When linking to static device library, the + // device linker may drop some of the device global symbols if they aren't + // referenced. As a workaround, we are adding to the --whole-archive flag such + // that all global symbols would be linked in. + LldArgs.push_back("--whole-archive"); + for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { LldArgs.push_back(Arg->getValue(1)); Arg->claim(); diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -126,6 +126,7 @@ // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // LINK-SAME: "-plugin-opt=mcpu=gfx803" +// LINK-SAME: "--whole-archive" // LLD-TMP-SAME: "-o" "[[IMG_DEV1:.*.out]]" // LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx803]]" // LINK-SAME "[[A_BC1]]" "[[B_BC1]]" @@ -135,10 +136,12 @@ // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // LINK-SAME: "-plugin-opt=mcpu=gfx900" +// LINK-SAME: "--whole-archive" // LLD-TMP-SAME: "-o" "[[IMG_DEV2:.*.out]]" // LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx900]]" // LINK-SAME "[[A_BC2]]" "[[B_BC2]]" + // LINK-BUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-BUNDLE-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900" // LINK-BUNDLE-SAME: "-input={{.*}}" "-input=[[IMG_DEV1]]" "-input=[[IMG_DEV2]]" "-output=[[BUNDLE:.*]]" diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip --- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip +++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip @@ -80,6 +80,7 @@ // CHECK-NOT: ".*llc" // CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // CHECK-SAME: "-plugin-opt=mcpu=gfx900" +// CHECK-SAME: "--whole-archive" // CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]] // combine images generated into hip fat binary object