Index: lib/Lex/LiteralSupport.cpp
===================================================================
--- lib/Lex/LiteralSupport.cpp
+++ lib/Lex/LiteralSupport.cpp
@@ -616,8 +616,11 @@
       if (isHalf || isFloat || isLong || isFloat128)
         break; // HF, FF, LF, QF invalid.
 
-      if (PP.getTargetInfo().hasFloat16Type() && s + 2 < ThisTokEnd &&
-          s[1] == '1' && s[2] == '6') {
+      // CUDA host and device may have different _Float16 support, therefore
+      // allows f16 literals to avoid false alarm.
+      // ToDo: more precise check for CUDA.
+      if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) &&
+          s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
         s += 2; // success, eat up 2 characters.
         isFloat16 = true;
         continue;
Index: lib/Sema/SemaType.cpp
===================================================================
--- lib/Sema/SemaType.cpp
+++ lib/Sema/SemaType.cpp
@@ -1442,7 +1442,10 @@
       Result = Context.Int128Ty;
     break;
   case DeclSpec::TST_float16:
-    if (!S.Context.getTargetInfo().hasFloat16Type())
+    // CUDA host and device may have different _Float16 support, therefore
+    // do not diagnose _Float16 usage to avoid false alarm.
+    // ToDo: more precise diagnostics for CUDA.
+    if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA)
       S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
         << "_Float16";
     Result = Context.Float16Ty;
Index: test/SemaCUDA/float16.cu
===================================================================
--- test/SemaCUDA/float16.cu
+++ test/SemaCUDA/float16.cu
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+// expected-no-diagnostics
+#include "Inputs/cuda.h"
+
+__device__ void f(_Float16 x);
+
+__device__ _Float16 x = 1.0f16;