Index: include/llvm/IR/GlobalAlias.h
===================================================================
--- include/llvm/IR/GlobalAlias.h
+++ include/llvm/IR/GlobalAlias.h
@@ -82,6 +82,10 @@
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::GlobalAliasVal;
   }
+
+  // return the constant offset of an expression, with which this global var
+  // has alias.
+  uint64_t calculateOffset(const DataLayout &DL) const;
 };
 
 template <>
Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -945,8 +945,11 @@
       EmitVisibility(Name, Alias.getVisibility());
 
       // Emit the directives as assignments aka .set:
-      OutStreamer.EmitAssignment(Name,
-                                 MCSymbolRefExpr::Create(Target, OutContext));
+      const MCExpr *Expr = MCSymbolRefExpr::Create(Target, OutContext);
+      if (uint64_t Offset = Alias.calculateOffset(*TM.getDataLayout()))
+        Expr = MCBinaryExpr::CreateAdd(Expr,
+                 MCConstantExpr::Create(Offset, OutContext), OutContext);
+      OutStreamer.EmitAssignment(Name, Expr);
     }
   }
 
Index: lib/IR/Globals.cpp
===================================================================
--- lib/IR/Globals.cpp
+++ lib/IR/Globals.cpp
@@ -15,6 +15,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -268,3 +269,27 @@
       return GV;
   }
 }
+
+uint64_t GlobalAlias::calculateOffset(const DataLayout &DL) const {
+  uint64_t Offset = 0;
+  const Constant *C = this;
+  while (C) {
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(C)) {
+      C = GA->getAliasee();
+    } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      if (CE->getOpcode() == Instruction::GetElementPtr) {
+        std::vector<Value*> Args;
+        for (unsigned I = 1; I < CE->getNumOperands(); ++I)
+          Args.push_back(CE->getOperand(I));
+        Offset += DL.getIndexedOffset(CE->getOperand(0)->getType(), Args);
+      }
+      C = CE->getOperand(0);
+    } else if (isa<GlobalValue>(C)) {
+      return Offset;
+    } else {
+      assert(0 && "Unexpected type in alias chain!");
+      return 0;
+    }
+  }
+  return Offset;
+}
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -376,6 +376,10 @@
   virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                                   unsigned Intrinsic) const override;
 
+  /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+  /// be used for loads / stores from the global.
+  unsigned getMaximalGlobalOffset() const override;
+
 protected:
   std::pair<const TargetRegisterClass*, uint8_t>
   findRepresentativeClass(MVT VT) const;
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5537,3 +5537,10 @@
     return AM.Scale != 0 && AM.Scale != 1;
   return -1;
 }
+
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
+  return 4095;
+}
+
Index: lib/Target/AArch64/AArch64TargetMachine.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetMachine.cpp
+++ lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -86,11 +87,19 @@
     return *getAArch64TargetMachine().getSubtargetImpl();
   }
 
+  bool addPreISel() override;
   virtual bool addInstSelector();
   virtual bool addPreEmitPass();
 };
 } // namespace
 
+bool AArch64PassConfig::addPreISel() {
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createGlobalMergePass(TM));
+
+  return false;
+}
+
 TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
   return new AArch64PassConfig(this, PM);
 }
Index: lib/Transforms/Scalar/GlobalMerge.cpp
===================================================================
--- lib/Transforms/Scalar/GlobalMerge.cpp
+++ lib/Transforms/Scalar/GlobalMerge.cpp
@@ -80,6 +80,11 @@
                          cl::desc("Enable global merge pass on constants"),
                          cl::init(false));
 
+static cl::opt<bool>
+EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
+                   cl::desc("Enable global merge pass on external linkage"),
+                   cl::init(false));
+
 STATISTIC(NumMerged      , "Number of globals merged");
 namespace {
   class GlobalMerge : public FunctionPass {
@@ -158,24 +163,53 @@
     uint64_t MergedSize = 0;
     std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
+    bool InternalOnly = true;
     for (j = i; j != e; ++j) {
       Type *Ty = Globals[j]->getType()->getElementType();
       MergedSize += DL->getTypeAllocSize(Ty);
       if (MergedSize > MaxOffset) {
+        MergedSize -= DL->getTypeAllocSize(Ty);
         break;
       }
       Tys.push_back(Ty);
       Inits.push_back(Globals[j]->getInitializer());
+
+      if (Globals[i]->hasExternalLinkage()) {
+        InternalOnly = false;
+      }
     }
 
     StructType *MergedTy = StructType::get(M.getContext(), Tys);
     Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+
+    // If merged variables doesn't have external linkage, we needn't to expose
+    // the symbol after merging.
+    GlobalValue::LinkageTypes Linkage = InternalOnly ?
+                                          GlobalValue::InternalLinkage :
+                                          GlobalValue::ExternalLinkage ;
+
+    // If merged variables have external linkage, we use symbol name of the
+    // first variable merged as the suffix of global symbol name.
+    Twine MergedGVName = InternalOnly ? 
+                           "_MergedGlobals" :
+                           "_MergedGlobals_" + Globals[i]->getName() ;
     GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
-                                                  GlobalValue::InternalLinkage,
-                                                  MergedInit, "_MergedGlobals",
-                                                  0, GlobalVariable::NotThreadLocal,
-                                                  AddrSpace);
+                                     Linkage, MergedInit, MergedGVName,
+                                     0, GlobalVariable::NotThreadLocal,
+                                     AddrSpace);
+
+    if (EnableGlobalMergeOnExternal) {
+      // If the alignment is not a power of 2, round up to the next power of 2.
+      uint64_t Align = MergedSize;
+      if (Align & (Align-1))
+        Align = llvm::NextPowerOf2(Align);
+      MergedGV->setAlignment(Align);
+    }
+
     for (size_t k = i; k < j; ++k) {
+      GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
+      std::string Name = Globals[k]->getName();
+
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
         ConstantInt::get(Int32Ty, k-i)
@@ -183,6 +217,12 @@
       Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
       Globals[k]->replaceAllUsesWith(GEP);
       Globals[k]->eraseFromParent();
+
+      if (Linkage != GlobalValue::InternalLinkage) {
+        // Generate a new alias...
+        new GlobalAlias(GEP->getType(), Linkage, Name, GEP, &M);
+      }
+
       NumMerged++;
     }
     i = j;
@@ -243,8 +283,10 @@
   // Grab all non-const globals.
   for (Module::global_iterator I = M.global_begin(),
          E = M.global_end(); I != E; ++I) {
-    // Merge is safe for "normal" internal globals only
-    if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+    // Merge is safe for "normal" internal or external globals only
+    if (!((EnableGlobalMergeOnExternal && I->hasExternalLinkage())
+          || I->hasInternalLinkage())
+        || I->isDeclaration() || I->isThreadLocal() || I->hasSection())
       continue;
 
     PointerType *PT = dyn_cast<PointerType>(I->getType());
Index: test/CodeGen/AArch64/global_merge.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/global_merge.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -global-merge-on-external=true | FileCheck %s
+
+@x = global i32 0, align 4
+@y = global i32 0, align 4
+@z = global i32 0, align 4
+@m = internal global i32 0, align 4
+@n = internal global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: f1:
+; CHECK: adrp x{{[0-9]+}}, _MergedGlobals_
+  store i32 %a1, i32* @x, align 4
+  store i32 %a2, i32* @y, align 4
+
+; CHECK: adrp x{{[0-9]+}}, _MergedGlobals
+; CHECK-NOT: adrp
+  store i32 %a1, i32* @m, align 4
+  store i32 %a2, i32* @n, align 4
+  ret void
+}
+
+define void @g1(i32 %a1, i32 %a2) {
+; CHECK-LABEL: g1:
+; CHECK: adrp
+
+; We should have only one adrp generated for this function.
+; CHECK-NOT: adrp
+  store i32 %a1, i32* @y, align 4
+  store i32 %a2, i32* @z, align 4
+  ret void
+}
+
+; CHECK:        .bss
+; CHECK:        .globl	_MergedGlobals_x
+; CHECK:        .align	4
+; CHECK: _MergedGlobals_
+; CHECK:        .size	_MergedGlobals_x, 12
+
+; CHECK:        .local	_MergedGlobals
+; CHECK:        .comm	_MergedGlobals,8,8
+
+; CHECK:        .globl  x
+; CHECK: x = _MergedGlobals_x
+; CHECK:        .globl  y
+; CHECK: y = _MergedGlobals_x+4
+; CHECK:        .globl  z
+; CHECK: z = _MergedGlobals_x+8
+