diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -152,6 +152,18 @@ addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); + // Given that host and device linking happen in separate processes, the device + // linker doesn't always have the visibility as to which device symbols are + // needed by a program, especially for the device symbol dependencies that are + // introduced through the host symbol resolution. + // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B() + // (B.obj) In this case, the device linker doesn't know that A.obj actually + // depends on the kernel functions in B.obj. When linking to static device + // library, the device linker may drop some of the device global symbols if + // they aren't referenced. As a workaround, we are adding to the + // --whole-archive flag such that all global symbols would be linked in. + LldArgs.push_back("--whole-archive"); + for (auto *Arg : Args.filtered(options::OPT_Xoffload_linker)) { LldArgs.push_back(Arg->getValue(1)); Arg->claim(); @@ -169,6 +181,9 @@ /*IsBitCodeSDL=*/true, /*PostClangLink=*/false); + // pair with the --whole-archive being added previously + LldArgs.push_back("--no-whole-archive"); + const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), Lld, LldArgs, Inputs, Output)); diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip --- a/clang/test/Driver/hip-toolchain-rdc-separate.hip +++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip @@ -126,18 +126,22 @@ // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // LINK-SAME: "-plugin-opt=mcpu=gfx803" +// LINK-SAME: "--whole-archive" // LLD-TMP-SAME: "-o" "[[IMG_DEV1:.*.out]]" // LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx803]]" // LINK-SAME "[[A_BC1]]" "[[B_BC1]]" +// LINK-SAME: "--no-whole-archive" // LINK-NOT: "*.llvm-link" // LINK-NOT: ".*opt" // LINK-NOT: ".*llc" // LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // LINK-SAME: "-plugin-opt=mcpu=gfx900" +// LINK-SAME: "--whole-archive" // LLD-TMP-SAME: "-o" "[[IMG_DEV2:.*.out]]" // LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx900]]" // LINK-SAME "[[A_BC2]]" "[[B_BC2]]" +// LINK-SAME: "--no-whole-archive" // LINK-BUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // LINK-BUNDLE-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900" diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip --- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip +++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip @@ -80,7 +80,9 @@ // CHECK-NOT: ".*llc" // CHECK: [[LLD]] {{.*}} "-plugin-opt=-amdgpu-internalize-symbols" // CHECK-SAME: "-plugin-opt=mcpu=gfx900" +// CHECK-SAME: "--whole-archive" // CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[A_BC2]] [[B_BC2]] +// CHECK-SAME: "--no-whole-archive" // combine images generated into hip fat binary object // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"