Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -103,6 +103,8 @@
 
   void EmitFunctionEntryLabel() override;
 
+  void EmitGlobalVariable(const GlobalVariable *GV) override;
+
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
                        raw_ostream &O) override;
Index: lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -123,6 +123,30 @@
   AsmPrinter::EmitFunctionEntryLabel();
 }
 
+void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+
+  const Module *M = GV->getParent();
+  if (Triple(M->getTargetTriple()).getOS() != Triple::AMDHSA ||
+      (!GV->hasCommonLinkage() && !GV->hasInternalLinkage())) {
+    AsmPrinter::EmitGlobalVariable(GV);
+    return;
+  }
+
+  AMDGPUTargetStreamer *TS =
+      static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+  TS->EmitAMDGPUHsaModuleGlobal(GV->getName());
+
+  const DataLayout &DL = getDataLayout();
+  OutStreamer->PushSection();
+  OutStreamer->SwitchSection(
+      getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
+  MCSymbol *GVSym = getSymbol(GV);
+  const Constant *C = GV->getInitializer();
+  OutStreamer->EmitLabel(GVSym);
+  EmitGlobalConstant(DL, C);
+  OutStreamer->PopSection();
+}
+
 bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   // The starting address of all shader programs must be 256 bytes aligned.
Index: lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.h
+++ lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.h
@@ -22,6 +22,9 @@
 namespace llvm {
 
 class AMDGPUHSATargetObjectFile final : public TargetLoweringObjectFileELF {
+private:
+  MCSection *DataGlobalAgentSection;
+
 public:
   void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
Index: lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.cpp
+++ lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.cpp
@@ -22,6 +22,13 @@
 
   TextSection = AMDGPU::getHSATextSection(Ctx);
 
+  DataGlobalAgentSection = AMDGPU::getHSADataGlobalAgentSection(Ctx);
+}
+
+
+static bool isModuleLinkage(const GlobalValue *GV) {
+  return GV->getLinkage() == GlobalValue::CommonLinkage ||
+         GV->getLinkage() == GlobalValue::InternalLinkage;
 }
 
 MCSection *AMDGPUHSATargetObjectFile::SelectSectionForGlobal(
@@ -31,5 +38,9 @@
   if (Kind.isText() && !GV->hasComdat())
     return getTextSection();
 
+  // isCommon means globally writeable with a zero initializer.
+  if (isModuleLinkage(GV))
+    return DataGlobalAgentSection;
+
   return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
 }
Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
===================================================================
--- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -358,6 +358,8 @@
   bool ParseSectionDirectiveHSAText();
   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
   bool ParseDirectiveAMDGPUHsaKernel();
+  bool ParseDirectiveAMDGPUHsaModuleGlobal();
+  bool ParseSectionDirectiveHSADataGlobalAgent();
 
 public:
 public:
@@ -957,6 +959,23 @@
   return false;
 }
 
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
+  if (getLexer().isNot(AsmToken::Identifier))
+    return TokError("expected symbol name");
+
+  StringRef GlobalName = Parser.getTok().getIdentifier();
+
+  getTargetStreamer().EmitAMDGPUHsaModuleGlobal(GlobalName);
+  Lex();
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
+  getParser().getStreamer().SwitchSection(
+      AMDGPU::getHSADataGlobalAgentSection(getContext()));
+  return false;
+}
+
 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getString();
 
@@ -975,6 +994,12 @@
   if (IDVal == ".amdgpu_hsa_kernel")
     return ParseDirectiveAMDGPUHsaKernel();
 
+  if (IDVal == ".amdgpu_hsa_module_global")
+    return ParseDirectiveAMDGPUHsaModuleGlobal();
+
+  if (IDVal == ".hsadata_global_agent")
+    return ParseSectionDirectiveHSADataGlobalAgent();
+
   return true;
 }
 
Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -36,6 +36,6 @@
 }
 
 bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
-  return SectionName == ".hsatext" ||
+  return SectionName == ".hsatext" || SectionName == ".hsadata_global_agent" ||
          MCAsmInfo::shouldOmitSectionDirective(SectionName);
 }
Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -32,6 +32,8 @@
   virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
 
   virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
+
+  virtual void EmitAMDGPUHsaModuleGlobal(StringRef GlobalName) = 0;
 };
 
 class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@@ -48,6 +50,8 @@
   void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+  void EmitAMDGPUHsaModuleGlobal(StringRef GlobalName) override;
 };
 
 class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
@@ -80,6 +84,8 @@
   void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+  void EmitAMDGPUHsaModuleGlobal(StringRef GlobalName) override;
 };
 
 }
Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
===================================================================
--- lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -231,6 +231,10 @@
   }
 }
 
+void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleGlobal(StringRef GlobalName) {
+  OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetELFStreamer
 //===----------------------------------------------------------------------===//
@@ -316,3 +320,10 @@
       getStreamer().getContext().getOrCreateSymbol(SymbolName));
   Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
 }
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleGlobal(StringRef GlobalName) {
+
+  MCSymbolELF *Symbol = cast<MCSymbolELF>(
+      getStreamer().getContext().getOrCreateSymbol(GlobalName));
+  Symbol->setType(ELF::STT_OBJECT);
+}
Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -31,6 +31,8 @@
                                const FeatureBitset &Features);
 MCSection *getHSATextSection(MCContext &Ctx);
 
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
+
 } // end namespace AMDGPU
 } // end namespace llvm
 
Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -66,5 +66,12 @@
                            ELF::SHF_AMDGPU_HSA_CODE);
 }
 
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
+  return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
+                           ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                           ELF::SHF_AMDGPU_HSA_GLOBAL |
+                           ELF::SHF_AMDGPU_HSA_AGENT);
+}
+
 } // End namespace AMDGPU
 } // End namespace llvm
Index: test/CodeGen/AMDGPU/hsa.ll
===================================================================
--- test/CodeGen/AMDGPU/hsa.ll
+++ test/CodeGen/AMDGPU/hsa.ll
@@ -25,6 +25,20 @@
 ; ELF: 0040: 50550000
 
 ; ELF: Symbol {
+; ELF: Name: __wg_scratch
+; ELF: Binding: Local (0x0)
+; ELF: Type: Object (0x1)
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: global_agent
+; ELF: Binding: Local (0x0)
+; ELF: Type: Object (0x1)
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
 ; ELF: Name: simple
 ; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
 ; ELF: }
@@ -46,6 +60,17 @@
 ; On VI+ we also need to set MTYPE = 2
 ; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
+; HSA: .amdgpu_hsa_module_global __wg_scratch
+; HSA: .hsadata_global_agent
+; HSA: __wg_scratch:
+; HSA: .long 0
+; HSA: .amdgpu_hsa_module_global global_agent
+; HSA: .hsadata_global_agent
+; HSA: global_agent:
+; HSA: .zero 64
+
+@__wg_scratch = common addrspace(3) global i32 zeroinitializer, align 4
+@global_agent = internal unnamed_addr addrspace(3) global [8 x i64] undef, align 16
 
 define void @simple(i32 addrspace(1)* %out) {
 entry:
Index: test/MC/AMDGPU/hsa-globals.s
===================================================================
--- /dev/null
+++ test/MC/AMDGPU/hsa-globals.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
+
+
+; ELF: Symbol {
+; ELF: Name: __wg_scratch
+; ELF: Binding: Local (0x0)
+; ELF: Type: Object (0x1)
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ASM: .amdgpu_hsa_module_global __wg_scratch
+; ASM: .hsadata_global_agent
+; ASM: __wg_scratch:
+; ASM: .long 0
+
+.amdgpu_hsa_module_global __wg_scratch
+.hsadata_global_agent
+__wg_scratch:
+.long   0
+