Index: lib/AST/ItaniumMangle.cpp
===================================================================
--- lib/AST/ItaniumMangle.cpp
+++ lib/AST/ItaniumMangle.cpp
@@ -484,6 +484,14 @@
   if (!Context.shouldMangleDeclName(FD))
     return;
 
+  // CUDA __host__ __device__ functions co-exist with both __host__ and
+  // __device__ functions, so they need a different mangled name.  We sort
+  // "device", "host", and "enable_if" attrs alphabetically.
+  bool IsCudaHostDevice =
+      FD->hasAttr<CUDADeviceAttr>() && FD->hasAttr<CUDAHostAttr>();
+  if (IsCudaHostDevice)
+    Out << "Ua6device";
+
   if (FD->hasAttr<EnableIfAttr>()) {
     FunctionTypeDepthState Saved = FunctionTypeDepth.push();
     Out << "Ua9enable_ifI";
@@ -503,6 +511,9 @@
     FunctionTypeDepth.pop(Saved);
   }
 
+  if (IsCudaHostDevice)
+    Out << "Ua4host";
+
   // Whether the mangling of a function type includes the return type depends on
   // the context and the nature of the function. The rules for deciding whether
   // the return type is included are:
Index: test/CodeGenCUDA/convergent.cu
===================================================================
--- test/CodeGenCUDA/convergent.cu
+++ test/CodeGenCUDA/convergent.cu
@@ -17,23 +17,23 @@
 
 // HOST: Function Attrs:
 // HOST-NOT: convergent
-// HOST-NEXT: define void @_Z3barv
+// HOST-NEXT: define void @_Z3barUa6deviceUa4hostv
 // DEVICE: Function Attrs:
 // DEVICE-SAME: convergent
-// DEVICE-NEXT: define void @_Z3barv
+// DEVICE-NEXT: define void @_Z3barUa6deviceUa4hostv
 __host__ __device__ void baz();
 __host__ __device__ void bar() {
-  // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
+  // DEVICE: call void @_Z3bazUa6deviceUa4hostv() [[CALL_ATTR:#[0-9]+]]
   baz();
 }
 
-// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// DEVICE: declare void @_Z3bazUa6deviceUa4hostv() [[BAZ_ATTR:#[0-9]+]]
 // DEVICE: attributes [[BAZ_ATTR]] = {
 // DEVICE-SAME: convergent
 // DEVICE-SAME: }
 // DEVICE: attributes [[CALL_ATTR]] = { convergent }
 
-// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// HOST: declare void @_Z3bazUa6deviceUa4hostv() [[BAZ_ATTR:#[0-9]+]]
 // HOST: attributes [[BAZ_ATTR]] = {
 // HOST-NOT: convergent
 // NOST-SAME: }
Index: test/CodeGenCUDA/device-var-init.cu
===================================================================
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -375,14 +375,14 @@
 // CHECK:   call void @_ZN2UCC1Ev(%struct.UC* %uc)
 // CHECK:   call void @_ZN3ECIC1Ev(%struct.ECI* %eci)
 // CHECK:   call void @_ZN3NECC1Ev(%struct.NEC* %nec)
-// CHECK:   call void @_ZN3NCVC1Ev(%struct.NCV* %ncv)
-// CHECK:   call void @_ZN3NCFC1Ev(%struct.NCF* %ncf)
-// CHECK:   call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs)
+// CHECK:   call void @_ZN3NCVC1EUa6deviceUa4hostv(%struct.NCV* %ncv)
+// CHECK:   call void @_ZN3NCFC1EUa6deviceUa4hostv(%struct.NCF* %ncf)
+// CHECK:   call void @_ZN4NCFSC1EUa6deviceUa4hostv(%struct.NCFS* %ncfs)
 // CHECK:   call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc)
 // CHECK:   call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
 // CHECK:   call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
 // CHECK:   call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
-// CHECK:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
+// CHECK:   call void @_ZN5T_V_TC1EUa6deviceUa4hostv(%struct.T_V_T* %t_v_t)
 // CHECK:   call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
 // CHECK:   call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
 // CHECK:   call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
Index: test/CodeGenCUDA/function-overload.cu
===================================================================
--- test/CodeGenCUDA/function-overload.cu
+++ test/CodeGenCUDA/function-overload.cu
@@ -35,9 +35,9 @@
   s_cd_dh scddh;
   // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev(
   s_cd_hd scdhd;
-  // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev
+  // CHECK-BOTH: call void @_ZN7s_cd_hdC1EUa6deviceUa4hostv
 
-  // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev(
+  // CHECK-BOTH: call void @_ZN7s_cd_hdD1EUa6deviceUa4hostv(
   // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev(
 }
 // CHECK-BOTH: ret void
@@ -49,11 +49,11 @@
 // CHECK-DEVICE: store i32 12,
 // CHECK-BOTH: ret void
 
-// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev(
+// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2EUa6deviceUa4hostv(
 // CHECK-BOTH:   store i32 31,
 // CHECK-BOTH: ret void
 
-// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
+// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2EUa6deviceUa4hostv(
 // CHECK-BOTH: store i32 32,
 // CHECK-BOTH: ret void
 
Index: test/CodeGenCUDA/mangling.cu
===================================================================
--- /dev/null
+++ test/CodeGenCUDA/mangling.cu
@@ -0,0 +1,20 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// Check that __host__ __device__ function mangled names explicitly contain
+// "host" and "device" attributes.  This is important because HD overloads may
+// coexist with H and D overloads.
+
+// CHECK: define i32 @_Z11host_deviceUa6deviceUa4hostv()
+__host__ __device__ int host_device() { return 0; }
+
+// The enable_if attribute should appear in-between the device and host attrs
+// in the mangled name.
+// CHECK: define i32 @_Z8enableifUa6deviceUa9enable_if{{.*}}Ua4hostv
+__attribute__((enable_if(1, "")))
+__host__ __device__ int enableif() { return 0; }