Index: lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- lib/Basic/Targets/AMDGPU.cpp
+++ lib/Basic/Targets/AMDGPU.cpp
@@ -307,5 +307,16 @@
 }
 
 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
+  assert(HalfFormat == Aux->HalfFormat);
+  assert(FloatFormat == Aux->FloatFormat);
+  assert(DoubleFormat == Aux->DoubleFormat);
+
+  // On x86_64 long double is 80-bit extended precision format, which is
+  // not supported by AMDGPU. 128-bit floating point format is also not
+  // supported by AMDGPU. Therefore keep its own format for these two types.
+  auto SaveLongDoubleFormat = LongDoubleFormat;
+  auto SaveFloat128Format = Float128Format;
   copyAuxTarget(Aux);
+  LongDoubleFormat = SaveLongDoubleFormat;
+  Float128Format = SaveFloat128Format;
 }
Index: test/CodeGenCUDA/types.cu
===================================================================
--- test/CodeGenCUDA/types.cu
+++ test/CodeGenCUDA/types.cu
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple amdgcn -aux-triple x86_64 -fcuda-is-device -emit-llvm %s -o - | FileCheck -check-prefix=DEV %s
+// RUN: %clang_cc1 -triple x86_64 -aux-triple amdgcn -emit-llvm %s -o - | FileCheck -check-prefix=HOST %s
+
+#include "Inputs/cuda.h"
+
+// HOST: @ld_host = global x86_fp80 0xK00000000000000000000
+long double ld_host;
+
+// DEV: @ld_device = addrspace(1) externally_initialized global double 0.000000e+00
+__device__ long double ld_device;