Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -1050,24 +1050,37 @@
 //
 // FIXME: This provides a sub-optimal error message if you attempt to
 // use this in CUDA, since CUDA does not use the same terminology.
-def AMDGPUNumVGPR : InheritableAttr {
-  let Spellings = [GNU<"amdgpu_num_vgpr">];
-  let Args = [UnsignedArgument<"NumVGPR">];
-  let Documentation = [AMDGPUNumVGPRDocs];
-
+//
 // FIXME: This should be for OpenCLKernelFunction, but is not to
 // workaround needing to see kernel attribute before others to know if
 // this should be rejected on non-kernels.
-  let Subjects = SubjectList<[Function], ErrorDiag,
-                             "ExpectedKernelFunction">;
+
+def AMDGPUFlatWorkGroupSize : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_flat_work_group_size">];
+  let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max">];
+  let Documentation = [AMDGPUFlatWorkGroupSizeDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+}
+
+def AMDGPUWavesPerEU : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_waves_per_eu">];
+  let Args = [UnsignedArgument<"Min">, VariadicUnsignedArgument<"Max">];
+  let Documentation = [AMDGPUWavesPerEUDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
 }
 
 def AMDGPUNumSGPR : InheritableAttr {
   let Spellings = [GNU<"amdgpu_num_sgpr">];
   let Args = [UnsignedArgument<"NumSGPR">];
-  let Documentation = [AMDGPUNumSGPRDocs];
-  let Subjects = SubjectList<[Function], ErrorDiag,
-                              "ExpectedKernelFunction">;
+  let Documentation = [AMDGPUNumSGPRNumVGPRDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+}
+
+def AMDGPUNumVGPR : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_num_vgpr">];
+  let Args = [UnsignedArgument<"NumVGPR">];
+  let Documentation = [AMDGPUNumSGPRNumVGPRDocs];
+  let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
 }
 
 def NoSplitStack : InheritableAttr {
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -889,12 +889,12 @@
 enumerator, a non-static data member, or a label.
 
 .. code-block: c++
-  #include <cassert>
-
-  [[maybe_unused]] void f([[maybe_unused]] bool thing1,
-                          [[maybe_unused]] bool thing2) {
-    [[maybe_unused]] bool b = thing1 && thing2;
-    assert(b);
+  #include <cassert>
+
+  [[maybe_unused]] void f([[maybe_unused]] bool thing1,
+                          [[maybe_unused]] bool thing2) {
+    [[maybe_unused]] bool b = thing1 && thing2;
+    assert(b);
   }
   }];
 }
@@ -911,15 +911,15 @@
 `void`.
 
 .. code-block: c++
-  struct [[nodiscard]] error_info { /*...*/ };
-  error_info enable_missile_safety_mode();
-  
-  void launch_missiles();
-  void test_missiles() {
-    enable_missile_safety_mode(); // diagnoses
-    launch_missiles();
-  }
-  error_info &foo();
+  struct [[nodiscard]] error_info { /*...*/ };
+  error_info enable_missile_safety_mode();
+  
+  void launch_missiles();
+  void test_missiles() {
+    enable_missile_safety_mode(); // diagnoses
+    launch_missiles();
+  }
+  error_info &foo();
   void f() { foo(); } // Does not diagnose, error_info is a reference.
   }];
 }
@@ -1076,64 +1076,108 @@
 }];
 }
 
-def DocCatAMDGPURegisterAttributes :
-  DocumentationCategory<"AMD GPU Register Attributes"> {
-  let Content = [{
-Clang supports attributes for controlling register usage on AMD GPU
-targets. These attributes may be attached to a kernel function
-definition and is an optimization hint to the backend for the maximum
-number of registers to use. This is useful in cases where register
-limited occupancy is known to be an important factor for the
-performance for the kernel.
-
-The semantics are as follows:
-
-- The backend will attempt to limit the number of used registers to
-  the specified value, but the exact number used is not
-  guaranteed. The number used may be rounded up to satisfy the
-  allocation requirements or ABI constraints of the subtarget. For
-  example, on Southern Islands VGPRs may only be allocated in
-  increments of 4, so requesting a limit of 39 VGPRs will really
-  attempt to use up to 40. Requesting more registers than the
-  subtarget supports will truncate to the maximum allowed. The backend
-  may also use fewer registers than requested whenever possible.
-
-- 0 implies the default no limit on register usage.
-
-- Ignored on older VLIW subtargets which did not have separate scalar
-  and vector registers, R600 through Northern Islands.
-
-}];
-}
-
-
-def AMDGPUNumVGPRDocs : Documentation {
-  let Category = DocCatAMDGPURegisterAttributes;
-  let Content = [{
-Clang supports the
-``__attribute__((amdgpu_num_vgpr(<num_registers>)))`` attribute on AMD
-Southern Islands GPUs and later for controlling the number of vector
-registers. A typical value would be between 4 and 256 in increments
-of 4.
-}];
-}
+def DocCatAMDGPUAttributes : DocumentationCategory<"AMD GPU Attributes">;
 
-def AMDGPUNumSGPRDocs : Documentation {
-  let Category = DocCatAMDGPURegisterAttributes;
+def AMDGPUFlatWorkGroupSizeDocs : Documentation {
+  let Category = DocCatAMDGPUAttributes;
   let Content = [{
+The flat work-group size is the number of work-items in the work-group size
+specified when the kernel is dispatched. It is the product of the sizes of the
+x, y, and z dimension of the work-group.
 
 Clang supports the
-``__attribute__((amdgpu_num_sgpr(<num_registers>)))`` attribute on AMD
-Southern Islands GPUs and later for controlling the number of scalar
-registers. A typical value would be between 8 and 104 in increments of
-8.
-
-Due to common instruction constraints, an additional 2-4 SGPRs are
-typically required for internal use depending on features used. This
-value is a hint for the total number of SGPRs to use, and not the
-number of user SGPRs, so no special consideration needs to be given
-for these.
-}];
+``__attribute__((amdgpu_flat_work_group_size(<min>, <max>)))`` attribute for the
+AMDGPU target. This attribute may be attached to a kernel function definition
+and is an optimization hint.
+
+``<min>`` parameter specifies the minimum flat work-group size (must be greater
+than or equal to 1), and ``<max>`` parameter specifies the maximum flat
+work-group size (must be less than ``<min>``) to which all dispatches of the
+kernel will conform.
+
+If specified, the AMDGPU target backend might be able to produce better machine
+code for barriers and perform scratch promotion by estimating available group
+segment size.
+
+An error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes.
+  }];
+}
+
+def AMDGPUWavesPerEUDocs : Documentation {
+  let Category = DocCatAMDGPUAttributes;
+  let Content = [{
+A compute unit (CU) is responsible for executing the wavefronts of a work-group.
+It is composed of one or more execution units (EU), which are responsible for
+executing the wavefronts. An EU can have enough resources to maintain the state
+of more than one executing wavefront. This allows an EU to hide latency by
+switching between wavefronts in a similar way to symmetric multithreading on a
+CPU. In order to allow the state for multiple wavefronts to fit on an EU, the
+resources used by a single wavefront have to be limited. For example, the number
+of SGPRs and VGPRs. Limiting such resources can allow greater latency hiding,
+but can result in having to spill some register state to memory.
+
+Clang supports the ``__attribute__((amdgpu_waves_per_eu(<min>[, <max>])))``
+attribute for the AMDGPU target. This attribute may be attached to a kernel
+function definition and is an optimization hint.
+
+``<min>`` parameter specifies the requested minimum number of waves per EU (must
+be greater than or equal to 1), and *optional* ``<max>`` parameter specifies the
+requested maximum number of waves per EU. If ``<max>`` is omitted, then there is
+no restriction on the maximum number of waves per EU other than the one dictated
+by the hardware for which the kernel is compiled.
+
+If specified, this attribute allows an advanced developer to tune the number of
+wavefronts that are capable of fitting within the resources of an EU. The AMDGPU
+target backend can use this information to limit resources, such as number of
+SGPRs, number of VGPRs, size of available group and private memory segments, in
+such a way that guarantees that at least ``<min>`` wavefronts and at most
+``<max>`` wavefronts are able to fit within the resources of an EU. Requesting
+more wavefronts can  hide memory latency but limits available registers which
+can result in spilling. Requesting fewer wavefronts can help reduce cache
+thrashing, but can reduce memory latency hiding.
+
+This attribute controls the machine code generated by the AMDGPU target backend
+to ensure it is capable of meeting the requested values. However, when the
+kernel is executed there may be other reasons that prevent meeting the request,
+for example, there may be wavefronts from other kernels executing on the EU.
+
+The error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes;
+  - The AMDGPU target backend is unable to create machine code that can meet the
+    request.
+  }];
+}
+
+def AMDGPUNumSGPRNumVGPRDocs : Documentation {
+  let Category = DocCatAMDGPUAttributes;
+  let Content = [{
+Clang supports the ``__attribute__((amdgpu_num_sgpr(<num_sgpr>)))`` and
+``__attribute__((amdgpu_num_vgpr(<num_vgpr>)))`` attributes for the AMDGPU
+target. These attributes may be attached to a kernel function definition and are
+an optimization hint.
+
+If these attributes are specified, then the AMDGPU target backend will attempt
+to limit the number of SGPRs and/or VGPRs used to the specified value(s). The
+number of used SGPRs and/or VGPRs may further be rounded up to satisfy the
+allocation requirements or constraints of the subtarget.
+
+These attributes can be used to test the AMDGPU target backend. It is
+recommended that the ``amdgpu_waves_per_eu`` attribute be used to control
+resources such as SGPRs and VGPRs since it is aware of the limits for different
+subtargets.
+
+An error will be given if:
+  - Specified values violate subtarget specifications;
+  - Specified values are not compatible with values provided through other
+    attributes;
+  - The AMDGPU target backend is unable to create machine code that can meet the
+    request.
+  }];
 }
 
 def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -2381,6 +2381,8 @@
   "'%0' parameter must have pointer%select{| to unqualified pointer}1 type; "
   "type here is %2">;
 
+def err_attribute_argument_invalid : Error<
+  "%0 attribute parameter is invalid: %1">;
 def err_attribute_argument_is_zero : Error<
   "%0 attribute must be greater than 0">;
 def warn_attribute_argument_n_negative : Warning<
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -6946,25 +6946,57 @@
 static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM);
 
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
-  const Decl *D,
-  llvm::GlobalValue *GV,
-  CodeGen::CodeGenModule &M) const {
+    const Decl *D,
+    llvm::GlobalValue *GV,
+    CodeGen::CodeGenModule &M) const {
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD)
     return;
 
-  if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
-    llvm::Function *F = cast<llvm::Function>(GV);
-    uint32_t NumVGPR = Attr->getNumVGPR();
-    if (NumVGPR != 0)
-      F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
+  llvm::Function *F = cast<llvm::Function>(GV);
+
+  if (const auto Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+    unsigned Min = Attr->getMin();
+    unsigned Max = Attr->getMax();
+
+    if (Min != 0) {
+      assert(Min <= Max && "Min must be less than or equal Max");
+
+      std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
+      F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
+    } else {
+      assert(Max == 0 && "Max must be zero");
+    }
+  }
+
+  if (const auto Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
+    unsigned Min = Attr->getMin();
+    unsigned Max = *Attr->max_begin();
+
+    if (Min != 0) {
+      assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
+
+      std::string AttrVal = llvm::utostr(Min);
+      if (Max != 0)
+        AttrVal = AttrVal + "," + llvm::utostr(Max);
+      F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
+    } else {
+      assert(Max == 0 && "Max must be zero");
+    }
   }
 
   if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
-    llvm::Function *F = cast<llvm::Function>(GV);
     unsigned NumSGPR = Attr->getNumSGPR();
+
     if (NumSGPR != 0)
-      F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
+      F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
+  }
+
+  if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
+    uint32_t NumVGPR = Attr->getNumVGPR();
+
+    if (NumVGPR != 0)
+      F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
   }
 
   appendOpenCLVersionMD(M);
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -4941,29 +4941,94 @@
   }
 }
 
-static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
+static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D,
+                                              const AttributeList &Attr) {
+  uint32_t Min = 0;
+  Expr *MinExpr = static_cast<Expr*>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, MinExpr, Min))
+    return;
+
+  uint32_t Max = 0;
+  Expr *MaxExpr = static_cast<Expr*>(Attr.getArgAsExpr(1));
+  if (!checkUInt32Argument(S, Attr, MaxExpr, Max))
+    return;
+
+  if (Min == 0 && Max != 0) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+      << Attr.getName()
+      << "maximum flat work-group size must be zero since minimum flat "
+         "work-group size is zero";
+    return;
+  }
+  if (Min > Max) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+      << Attr.getName()
+      << "minimum flat work-group size must not be greater than maximum flat "
+         "work-group size";
+    return;
+  }
+
+  D->addAttr(::new (S.Context)
+             AMDGPUFlatWorkGroupSizeAttr(Attr.getLoc(), S.Context, Min, Max,
+                                         Attr.getAttributeSpellingListIndex()));
+}
+
+static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D,
+                                       const AttributeList &Attr) {
+  uint32_t Min = 0;
+  Expr *MinExpr = static_cast<Expr*>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, MinExpr, Min))
+    return;
+
+  uint32_t Max = 0;
+  if (Attr.getNumArgs() == 2) {
+    Expr *MaxExpr = static_cast<Expr*>(Attr.getArgAsExpr(1));
+    if (!checkUInt32Argument(S, Attr, MaxExpr, Max))
+      return;
+  }
+
+  if (Min == 0 && Max != 0) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+      << Attr.getName()
+      << "maximum number of waves per execution unit must be zero since "
+         "minimum number of waves per execution unit is zero";
+    return;
+  }
+  if (Max != 0 && Min > Max) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+      << Attr.getName()
+      << "minimum number of waves per execution unit must not be greater than "
+         "maximum number of waves per execution unit";
+    return;
+  }
+
+  D->addAttr(::new (S.Context)
+             AMDGPUWavesPerEUAttr(Attr.getLoc(), S.Context,
+                                  Min, &Max, sizeof(Max),
+                                  Attr.getAttributeSpellingListIndex()));
+}
+
+static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
                                     const AttributeList &Attr) {
-  uint32_t NumRegs;
-  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
-  if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+  uint32_t NumSGPR = 0;
+  Expr *NumSGPRExpr = static_cast<Expr*>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, NumSGPRExpr, NumSGPR))
     return;
 
   D->addAttr(::new (S.Context)
-             AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context,
-                               NumRegs,
+             AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, NumSGPR,
                                Attr.getAttributeSpellingListIndex()));
 }
 
-static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
+static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
                                     const AttributeList &Attr) {
-  uint32_t NumRegs;
-  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
-  if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+  uint32_t NumVGPR = 0;
+  Expr *NumVGPRExpr = static_cast<Expr*>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, NumVGPRExpr, NumVGPR))
     return;
 
   D->addAttr(::new (S.Context)
-             AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context,
-                               NumRegs,
+             AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, NumVGPR,
                                Attr.getAttributeSpellingListIndex()));
 }
 
@@ -5417,12 +5482,18 @@
   case AttributeList::AT_NoMips16:
     handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
     break;
-  case AttributeList::AT_AMDGPUNumVGPR:
-    handleAMDGPUNumVGPRAttr(S, D, Attr);
+  case AttributeList::AT_AMDGPUFlatWorkGroupSize:
+    handleAMDGPUFlatWorkGroupSizeAttr(S, D, Attr);
+    break;
+  case AttributeList::AT_AMDGPUWavesPerEU:
+    handleAMDGPUWavesPerEUAttr(S, D, Attr);
     break;
   case AttributeList::AT_AMDGPUNumSGPR:
     handleAMDGPUNumSGPRAttr(S, D, Attr);
     break;
+  case AttributeList::AT_AMDGPUNumVGPR:
+    handleAMDGPUNumVGPRAttr(S, D, Attr);
+    break;
   case AttributeList::AT_IBAction:
     handleSimpleAttribute<IBActionAttr>(S, D, Attr);
     break;
@@ -5974,7 +6045,11 @@
     } else if (Attr *A = D->getAttr<VecTypeHintAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
-    } else if (Attr *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+    } else if (Attr *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+        << A << ExpectedKernelFunction;
+      D->setInvalidDecl();
+    } else if (Attr *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
       Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << A << ExpectedKernelFunction;
       D->setInvalidDecl();
@@ -5982,6 +6057,10 @@
       Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
         << A << ExpectedKernelFunction;
       D->setInvalidDecl();
+    } else if (Attr *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+        << A << ExpectedKernelFunction;
+      D->setInvalidDecl();
     }
   }
 }
Index: test/CodeGenOpenCL/amdgpu-attrs.cl
===================================================================
--- test/CodeGenOpenCL/amdgpu-attrs.cl
+++ test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -0,0 +1,166 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
+
+__attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0() {}
+__attribute__((amdgpu_waves_per_eu(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0() {}
+__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void num_sgpr0() {}
+__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void num_vgpr0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_num_vgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_num_sgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_num_vgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0_num_vgpr_0() {}
+__attribute__((amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void num_sgpr_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_vgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0_num_vgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+kernel void waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2() [[WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4() [[WAVES_PER_EU_2_4:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @num_sgpr_32() [[NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @num_vgpr_64() [[NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void waves_per_eu_2_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_sgpr_32() [[WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void waves_per_eu_2_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_vgpr_64() [[WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_sgpr_32() [[WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_vgpr_64() [[WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @num_sgpr_32_num_vgpr_64() [[NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+
+// Make sure this is silently accepted on other targets.
+// X86-NOT: "amdgpu-flat-work-group-size"
+// X86-NOT: "amdgpu-waves-per-eu"
+// X86-NOT: "amdgpu-num-vgpr"
+// X86-NOT: "amdgpu-num-sgpr"
+
+// CHECK-NOT: "amdgpu-flat-work-group-size"="0,0"
+// CHECK-NOT: "amdgpu-waves-per-eu"="0"
+// CHECK-NOT: "amdgpu-waves-per-eu"="0,0"
+// CHECK-NOT: "amdgpu-num-sgpr"="0"
+// CHECK-NOT: "amdgpu-num-vgpr"="0"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { nounwind "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { nounwind "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
===================================================================
--- test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
+++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
@@ -1,48 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
-
-// Make sure this is silently accepted on other targets.
-
-__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
-kernel void test_num_vgpr64() {
-// CHECK: define amdgpu_kernel void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]]
-}
-
-__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
-kernel void test_num_sgpr32() {
-// CHECK: define amdgpu_kernel void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]]
-}
-
-__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
-kernel void test_num_vgpr64_sgpr32() {
-// CHECK: define amdgpu_kernel void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]]
-
-}
-
-__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics
-kernel void test_num_sgpr20_vgpr40() {
-// CHECK: define amdgpu_kernel void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]]
-}
-
-__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
-kernel void test_num_vgpr0() {
-}
-
-__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
-kernel void test_num_sgpr0() {
-}
-
-__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
-kernel void test_num_vgpr0_sgpr0() {
-}
-
-
-// X86-NOT: "amdgpu_num_vgpr"
-// X86-NOT: "amdgpu_num_sgpr"
-
-// CHECK-NOT: "amdgpu_num_vgpr"="0"
-// CHECK-NOT: "amdgpu_num_sgpr"="0"
-// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64"
-// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32"
-// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64"
-// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40"
Index: test/SemaCUDA/amdgpu-attrs.cu
===================================================================
--- test/SemaCUDA/amdgpu-attrs.cu
+++ test/SemaCUDA/amdgpu-attrs.cu
@@ -0,0 +1,110 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+
+// expected-error@+2 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64)))
+__global__ void flat_work_group_size_32_64() {}
+
+// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2)))
+__global__ void waves_per_eu_2() {}
+
+// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4)))
+__global__ void waves_per_eu_2_4() {}
+
+// expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(32)))
+__global__ void num_sgpr_32() {}
+
+// expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_vgpr(64)))
+__global__ void num_vgpr_64() {}
+
+
+// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2() {}
+
+// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4() {}
+
+// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32)))
+__global__ void flat_work_group_size_32_64_num_sgpr_32() {}
+
+// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_num_vgpr_64() {}
+
+// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
+__global__ void waves_per_eu_2_num_sgpr_32() {}
+
+// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
+__global__ void waves_per_eu_2_num_vgpr_64() {}
+
+// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
+__global__ void waves_per_eu_2_4_num_sgpr_32() {}
+
+// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
+__global__ void waves_per_eu_2_4_num_vgpr_64() {}
+
+// expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void num_sgpr_32_num_vgpr_64() {}
+
+
+// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {}
+
+// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {}
+
+// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {}
+
+// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {}
+
+
+// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {}
+
+// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {}
Index: test/SemaCUDA/amdgpu-num-gpr-attr.cu
===================================================================
--- test/SemaCUDA/amdgpu-num-gpr-attr.cu
+++ test/SemaCUDA/amdgpu-num-gpr-attr.cu
@@ -1,14 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
-
-#include "Inputs/cuda.h"
-
-__attribute__((amdgpu_num_vgpr(64)))
-__global__ void test_num_vgpr() { } // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
-
-__attribute__((amdgpu_num_sgpr(32)))
-__global__ void test_num_sgpr() { } // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-
-// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
-__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
-__global__ void test_num_vgpr_num_sgpr() { }
Index: test/SemaOpenCL/amdgpu-attrs.cl
===================================================================
--- test/SemaOpenCL/amdgpu-attrs.cl
+++ test/SemaOpenCL/amdgpu-attrs.cl
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -fsyntax-only %s
+
+typedef __attribute__((amdgpu_flat_work_group_size(32, 64))) struct struct_flat_work_group_size_32_64 { // expected-error {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+  int x;
+  float y;
+} struct_flat_work_group_size_32_64;
+typedef __attribute__((amdgpu_waves_per_eu(2))) struct struct_waves_per_eu_2 { // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+  int x;
+  float y;
+} struct_waves_per_eu_2;
+typedef __attribute__((amdgpu_waves_per_eu(2, 4))) struct struct_waves_per_eu_2_4 { // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+  int x;
+  float y;
+} struct_waves_per_eu_2_4;
+typedef __attribute__((amdgpu_num_sgpr(32))) struct struct_num_sgpr_32 { // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+  int x;
+  float y;
+} struct_num_sgpr_32;
+typedef __attribute__((amdgpu_num_vgpr(64))) struct struct_num_vgpr_64 { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+  int x;
+  float y;
+} struct_num_vgpr_64;
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) void func_flat_work_group_size_32_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2))) void func_waves_per_eu_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4))) void func_waves_per_eu_2_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+
+__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_num_sgpr("ABC"))) kernel void kernel_num_sgpr_ABC() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
+__attribute__((amdgpu_num_vgpr("ABC"))) kernel void kernel_num_vgpr_ABC() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
+
+__attribute__((amdgpu_flat_work_group_size(4294967296, 4294967296))) kernel void kernel_flat_work_group_size_L_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_flat_work_group_size(32, 4294967296))) kernel void kernel_flat_work_group_size_32_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_flat_work_group_size(4294967296, 64))) kernel void kernel_flat_work_group_size_L_64() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_waves_per_eu(4294967296))) kernel void kernel_waves_per_eu_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_waves_per_eu(2, 4294967296))) kernel void kernel_waves_per_eu_2_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_waves_per_eu(4294967296, 4))) kernel void kernel_waves_per_eu_L_4() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_num_sgpr(4294967296))) kernel void kernel_num_sgpr_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_num_vgpr(4294967296))) kernel void kernel_num_vgpr_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+
+__attribute__((amdgpu_flat_work_group_size(0, 64))) kernel void kernel_flat_work_group_size_0_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute parameter is invalid: maximum flat work-group size must be zero since minimum flat work-group size is zero}}
+__attribute__((amdgpu_waves_per_eu(0, 4))) kernel void kernel_waves_per_eu_0_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute parameter is invalid: maximum number of waves per execution unit must be zero since minimum number of waves per execution unit is zero}}
+
+__attribute__((amdgpu_flat_work_group_size(64, 32))) kernel void kernel_flat_work_group_size_64_32() {} // expected-error {{'amdgpu_flat_work_group_size' attribute parameter is invalid: minimum flat work-group size must not be greater than maximum flat work-group size}}
+__attribute__((amdgpu_waves_per_eu(4, 2))) kernel void kernel_waves_per_eu_4_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute parameter is invalid: minimum number of waves per execution unit must not be greater than maximum number of waves per execution unit}}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0))) kernel void kernel_flat_work_group_size_0_0() {}
+__attribute__((amdgpu_waves_per_eu(0))) kernel void kernel_waves_per_eu_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0))) kernel void kernel_waves_per_eu_0_0() {}
+__attribute__((amdgpu_num_sgpr(0))) kernel void kernel_num_sgpr_0() {}
+__attribute__((amdgpu_num_vgpr(0))) kernel void kernel_num_vgpr_0() {}
+
+kernel __attribute__((amdgpu_flat_work_group_size(32, 64))) void kernel_flat_work_group_size_32_64() {}
+kernel __attribute__((amdgpu_waves_per_eu(2))) void kernel_waves_per_eu_2() {}
+kernel __attribute__((amdgpu_waves_per_eu(2, 4))) void kernel_waves_per_eu_2_4() {}
+kernel __attribute__((amdgpu_num_sgpr(32))) void kernel_num_sgpr_32() {}
+kernel __attribute__((amdgpu_num_vgpr(64))) void kernel_num_vgpr_64() {}
Index: test/SemaOpenCL/amdgpu-num-register-attrs.cl
===================================================================
--- test/SemaOpenCL/amdgpu-num-register-attrs.cl
+++ test/SemaOpenCL/amdgpu-num-register-attrs.cl
@@ -1,40 +0,0 @@
-// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s
-
-typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
-  int x;
-  float y;
-} FooStruct;
-
-
-__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
-__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
-
-
-__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
-__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-
-__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {}
-__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {}
-__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {}
-
-// Check 0 VGPR is accepted.
-__attribute__((amdgpu_num_vgpr(0))) kernel void foo10() {}
-
-// Check 0 SGPR is accepted.
-__attribute__((amdgpu_num_sgpr(0))) kernel void foo11() {}
-
-// Check both 0 SGPR and VGPR is accepted.
-__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) kernel void foo12() {}
-
-// Too large VGPR value.
-__attribute__((amdgpu_num_vgpr(4294967296))) kernel void foo13() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
-
-__attribute__((amdgpu_num_sgpr(4294967296))) kernel void foo14() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
-
-__attribute__((amdgpu_num_sgpr(4294967296), amdgpu_num_vgpr(4294967296))) kernel void foo15() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
-
-
-// Make sure it is accepted with kernel keyword before the attribute.
-kernel __attribute__((amdgpu_num_vgpr(40))) void foo16() {}
-
-kernel __attribute__((amdgpu_num_sgpr(40))) void foo17() {}