diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3065,7 +3065,7 @@
 
       // amdgcn does not support linking of object files, therefore we skip
       // backend and assemble phases to output LLVM IR. Except for generating
-      // non-relocatable device coee, where we generate fat binary for device
+      // non-relocatable device code, where we generate fat binary for device
       // code and pass to host in Backend phase.
       if (CudaDeviceActions.empty())
         return ABRT_Success;
@@ -3074,7 +3074,7 @@
               CudaDeviceActions.size() == GpuArchList.size()) &&
              "Expecting one action per GPU architecture.");
       assert(!CompileHostOnly &&
-             "Not expecting CUDA actions in host-only compilation.");
+             "Not expecting HIP actions in host-only compilation.");
 
       if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM &&
           !EmitAsm) {
@@ -3203,12 +3203,16 @@
              "Linker inputs and GPU arch list sizes do not match.");
 
       ActionList Actions;
-      // Append a new link action for each device.
       unsigned I = 0;
+      // Append a new link action for each device.
+      // Each entry in DeviceLinkerInputs corresponds to a GPU arch.
       for (auto &LI : DeviceLinkerInputs) {
-        // Each entry in DeviceLinkerInputs corresponds to a GPU arch.
-        auto *DeviceLinkAction =
-            C.MakeAction<LinkJobAction>(LI, types::TY_Image);
+
+        types::ID Output = Args.hasArg(options::OPT_emit_llvm)
+                                   ? types::TY_LLVM_BC
+                                   : types::TY_Image;
+
+        auto *DeviceLinkAction = C.MakeAction<LinkJobAction>(LI, Output);
         // Linking all inputs for the current GPU arch.
         // LI contains all the inputs for the linker.
         OffloadAction::DeviceDependences DeviceLinkDeps;
@@ -3220,6 +3224,12 @@
       }
       DeviceLinkerInputs.clear();
 
+      // If emitting LLVM, do not generate final host/device compilation action
+      if (Args.hasArg(options::OPT_emit_llvm)) {
+          AL.append(Actions);
+          return;
+      }
+
       // Create a host object from all the device images by embedding them
       // in a fat binary for mixed host-device compilation. For device-only
       // compilation, creates a fat binary.
@@ -3747,7 +3757,8 @@
   phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
 
   if (FinalPhase == phases::Link) {
-    if (Args.hasArg(options::OPT_emit_llvm))
+    // Emitting LLVM while linking disabled except in HIPAMD Toolchain
+    if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
       Diag(clang::diag::err_drv_emit_llvm_link);
     if (IsCLMode() && LTOMode != LTOK_None &&
         !Args.getLastArgValue(options::OPT_fuse_ld_EQ)
@@ -3932,7 +3943,10 @@
       // Queue linker inputs.
       if (Phase == phases::Link) {
         assert(Phase == PL.back() && "linking must be final compilation step.");
-        LinkerInputs.push_back(Current);
+        // We don't need to generate additional link commands if emitting AMD bitcode
+        if (!(C.getInputArgs().hasArg(options::OPT_hip_link) &&
+             (C.getInputArgs().hasArg(options::OPT_emit_llvm))))
+          LinkerInputs.push_back(Current);
         Current = nullptr;
         break;
       }
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h
--- a/clang/lib/Driver/ToolChains/HIPAMD.h
+++ b/clang/lib/Driver/ToolChains/HIPAMD.h
@@ -36,6 +36,10 @@
   void constructLldCommand(Compilation &C, const JobAction &JA,
                            const InputInfoList &Inputs, const InputInfo &Output,
                            const llvm::opt::ArgList &Args) const;
+  void constructLlvmLinkCommand(Compilation &C, const JobAction &JA,
+                                const InputInfoList &Inputs,
+                                const InputInfo &Output,
+                                const llvm::opt::ArgList &Args) const;
 };
 
 } // end namespace AMDGCN
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -72,6 +72,36 @@
   return false;
 }
 
+void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C,
+                                         const JobAction &JA,
+                                         const InputInfoList &Inputs,
+                                         const InputInfo &Output,
+                                         const llvm::opt::ArgList &Args) const {
+  // Construct llvm-link command.
+  // The output from llvm-link is a bitcode file.
+  ArgStringList LlvmLinkArgs;
+
+  assert(!Inputs.empty() && "Must have at least one input.");
+
+  LlvmLinkArgs.append({"-o", Output.getFilename()});
+  for (auto Input : Inputs)
+    LlvmLinkArgs.push_back(Input.getFilename());
+
+  // Look for archive of bundled bitcode in arguments, and add temporary files
+  // for the extracted archive of bitcode to inputs.
+  auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
+  AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn",
+                             TargetID,
+                             /*IsBitCodeSDL=*/true,
+                             /*PostClangLink=*/false);
+
+  const char *LlvmLink =
+    Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
+  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+                                         LlvmLink, LlvmLinkArgs, Inputs,
+                                         Output));
+}
+
 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
                                          const InputInfoList &Inputs,
                                          const InputInfo &Output,
@@ -135,7 +165,8 @@
 }
 
 // For amdgcn the inputs of the linker job are device bitcode and output is
-// object file. It calls llvm-link, opt, llc, then lld steps.
+// either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
+// llc, then lld steps.
 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
@@ -151,6 +182,9 @@
     return HIP::constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs,
                                           Args, *this);
 
+  if (JA.getType() == types::TY_LLVM_BC)
+    return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
+
   return constructLldCommand(C, JA, Inputs, Output, Args);
 }
 
diff --git a/clang/test/Driver/hip-link-bc-to-bc.hip b/clang/test/Driver/hip-link-bc-to-bc.hip
new file mode 100644
--- /dev/null
+++ b/clang/test/Driver/hip-link-bc-to-bc.hip
@@ -0,0 +1,34 @@
+// REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target
+
+// Check that clang unbundles the two bitcodes and links via llvm-link
+// RUN: touch %T/bundle1.bc
+// RUN: touch %T/bundle2.bc
+
+// RUN: %clang -### --offload-arch=gfx906 --hip-link \
+// RUN:   -emit-llvm -fgpu-rdc --cuda-device-only \
+// RUN:   %T/bundle1.bc %T/bundle2.bc \
+// RUN:   2>&1 | FileCheck -check-prefix=BITCODE %s
+
+// BITCODE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle1.bc" "-output=[[B1HOST:.*\.bc]]" "-output=[[B1DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
+// BITCODE: "{{.*}}clang-{{.*}}" "-o" "[[B1DEV2:.*bundle1-gfx906.bc]]" "-x" "ir" "[[B1DEV1]]"
+
+// BITCODE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle2.bc" "-output=[[B2HOST:.*\.bc]]" "-output=[[B2DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
+// BITCODE: "{{.*}}clang-{{.*}}" "-o" "[[B2DEV2:.*bundle2-gfx906.bc]]" "-x" "ir" "[[B2DEV1]]"
+
+// BITCODE: "{{.*}}llvm-link" "-o" "bundle1-hip-amdgcn-amd-amdhsa-gfx906.bc" "[[B1DEV2]]" "[[B2DEV2]]"
+
+// Check that clang unbundles the bitcode and archive and links via llvm-link
+// RUN: touch %T/libhipbundle.a
+// RUN: touch %T/bundle.bc
+
+// RUN: %clang -### --offload-arch=gfx906 --hip-link \
+// RUN:   -emit-llvm -fgpu-rdc --cuda-device-only \
+// RUN:   %T/bundle.bc -L%T -lhipbundle \
+// RUN:   2>&1 | FileCheck -check-prefix=ARCHIVE %s
+
+// ARCHIVE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle.bc" "-output=[[HOST:.*\.bc]]" "-output=[[DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
+// ARCHIVE: "{{.*}}clang-{{.*}}" "-o" "[[DEV2:.*\.bc]]" "-x" "ir" "[[DEV1]]"
+
+// ARCHIVE: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}libhipbundle.a" "-targets=hip-amdgcn-amd-amdhsa-gfx906" "-output=[[AR:.*\.a]]" "-allow-missing-bundles" "-hip-openmp-compatible"
+
+// ARCHIVE: "{{.*}}llvm-link" "-o" "bundle-hip-amdgcn-amd-amdhsa-gfx906.bc" "[[DEV2]]" "[[AR]]"
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -520,3 +520,25 @@
 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx803)
 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx900)
 // MIXED2-NEG-NOT: input, "{{.*}}empty.cpp", c++
+
+// Test HIP bitcode to bitcode linking. Input should be bundled or unbundled bitcode, and
+// output should be unbundled linked bitcode
+
+// RUN: touch %T/bitcodeA.bc
+// RUN: touch %T/bitcodeB.bc
+// RUN: %clang -ccc-print-phases --hip-link -emit-llvm --cuda-device-only \
+// RUN: --offload-arch=gfx906 %T/bitcodeA.bc %T/bitcodeB.bc 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK %s
+
+// CHECK: [[A0:[0-9]+]]: input, "{{.*}}bitcodeA.bc", ir
+// CHECK: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A0]]}, ir
+// CHECK: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir, (device-hip, [[ARCH:gfx906]])
+// CHECK: [[A3:[0-9]+]]: backend, {[[A2]]}, ir, (device-hip, [[ARCH]])
+
+// CHECK: [[B0:[0-9]+]]: input, "{{.*}}bitcodeB.bc", ir
+// CHECK: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B0]]}, ir
+// CHECK: [[B2:[0-9]+]]: compiler, {[[B1]]}, ir, (device-hip, [[ARCH]])
+// CHECK: [[B3:[0-9]+]]: backend, {[[B2]]}, ir, (device-hip, [[ARCH]])
+
+// CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
+// CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir