Index: include/llvm/IR/GlobalAlias.h =================================================================== --- include/llvm/IR/GlobalAlias.h +++ include/llvm/IR/GlobalAlias.h @@ -82,6 +82,10 @@ static inline bool classof(const Value *V) { return V->getValueID() == Value::GlobalAliasVal; } + + // return the constant offset of an expression, with which this global var + // has alias. + uint64_t calculateOffset(const DataLayout &DL) const; }; template <> Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -946,8 +946,11 @@ EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, - MCSymbolRefExpr::Create(Target, OutContext)); + const MCExpr *Expr = MCSymbolRefExpr::Create(Target, OutContext); + if (uint64_t Offset = Alias.calculateOffset(*TM.getDataLayout())) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(Offset, OutContext), OutContext); + OutStreamer.EmitAssignment(Name, Expr); } } Index: lib/IR/Globals.cpp =================================================================== --- lib/IR/Globals.cpp +++ lib/IR/Globals.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" @@ -268,3 +269,27 @@ return GV; } } + +uint64_t GlobalAlias::calculateOffset(const DataLayout &DL) const { + uint64_t Offset = 0; + const Constant *C = this; + while (C) { + if (const GlobalAlias *GA = dyn_cast(C)) { + C = GA->getAliasee(); + } else if (const ConstantExpr *CE = dyn_cast(C)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + std::vector Args; + for (unsigned I = 1; I < CE->getNumOperands(); ++I) + Args.push_back(CE->getOperand(I)); + Offset += DL.getIndexedOffset(CE->getOperand(0)->getType(), Args); + } + C = CE->getOperand(0); + } else if (isa(C)) { + return Offset; + } else { + assert(0 && "Unexpected type in alias chain!"); + return 0; + } + } + return Offset; +} Index: lib/Transforms/Scalar/GlobalMerge.cpp =================================================================== --- lib/Transforms/Scalar/GlobalMerge.cpp +++ lib/Transforms/Scalar/GlobalMerge.cpp @@ -72,7 +72,7 @@ #define DEBUG_TYPE "global-merge" static cl::opt -EnableGlobalMerge("global-merge", cl::Hidden, +EnableGlobalMerge("enable-global-merge", cl::NotHidden, cl::desc("Enable global merge pass"), cl::init(true)); @@ -81,6 +81,11 @@ cl::desc("Enable global merge pass on constants"), cl::init(false)); +static cl::opt +EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, + cl::desc("Enable global merge pass on external linkage"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -129,9 +134,23 @@ } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_PASS(GlobalMerge, "global-merge", - "Global Merge", false, false) +//INITIALIZE_PASS(GlobalMerge, "global-merge", +// "Global Merge", false, false) + +static void *initializeGlobalMergePassOnce(PassRegistry &Registry) { + PassInfo *PI = new PassInfo( + "Merge global variables", + "global-merge", &GlobalMerge::ID, + PassInfo::NormalCtor_t(callDefaultCtor), false, + false, PassInfo::TargetMachineCtor_t( + callTargetMachineCtor)); + Registry.registerPass(*PI, true); + return PI; +} +void llvm::initializeGlobalMergePass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializeGlobalMergePassOnce) +} bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { @@ -154,29 +173,60 @@ Type *Int32Ty = Type::getInt32Ty(M.getContext()); + assert (Globals.size() > 1); + bool IsExternal = Globals[0]->hasExternalLinkage(); + // If merged variables doesn't have external linkage, we needn't to expose + // the symbol after merging. + GlobalValue::LinkageTypes Linkage = IsExternal ? + GlobalValue::ExternalLinkage : + GlobalValue::InternalLinkage ; + for (size_t i = 0, e = Globals.size(); i != e; ) { size_t j = 0; uint64_t MergedSize = 0; std::vector Tys; std::vector Inits; + for (j = i; j != e; ++j) { Type *Ty = Globals[j]->getType()->getElementType(); MergedSize += DL->getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { + MergedSize -= DL->getTypeAllocSize(Ty); break; } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); + + // Globals have been classified, the globals passed to this function + // must all have the same linkage. + assert(Globals[i]->hasExternalLinkage() == IsExternal); } StructType *MergedTy = StructType::get(M.getContext(), Tys); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); + + // If merged variables have external linkage, we use symbol name of the + // first variable merged as the suffix of global symbol name. + Twine MergedGVName = IsExternal ? + "_MergedGlobals_" + Globals[i]->getName() : + "_MergedGlobals" ; GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, - GlobalValue::InternalLinkage, - MergedInit, "_MergedGlobals", - 0, GlobalVariable::NotThreadLocal, - AddrSpace); + Linkage, MergedInit, MergedGVName, + 0, GlobalVariable::NotThreadLocal, + AddrSpace); + + if (EnableGlobalMergeOnExternal && IsExternal) { + // If the alignment is not a power of 2, round up to the next power of 2. + uint64_t Align = MergedSize; + if (Align & (Align-1)) + Align = llvm::NextPowerOf2(Align); + MergedGV->setAlignment(Align); + } + for (size_t k = i; k < j; ++k) { + GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); + std::string Name = Globals[k]->getName(); + Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, k-i) @@ -184,6 +234,12 @@ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); + + if (Linkage != GlobalValue::InternalLinkage) { + // Generate a new alias... + new GlobalAlias(GEP->getType(), Linkage, Name, GEP, &M); + } + NumMerged++; } i = j; @@ -244,8 +300,12 @@ // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - // Merge is safe for "normal" internal globals only - if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + // Merge is safe for "normal" internal or external globals only + if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + continue; + + if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) + && !I->hasInternalLinkage()) continue; PointerType *PT = dyn_cast(I->getType()); Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -38,6 +38,7 @@ initializeDSEPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); + initializeGlobalMergePass(Registry); initializeIndVarSimplifyPass(Registry); initializeJumpThreadingPass(Registry); initializeLICMPass(Registry); Index: test/CodeGen/AArch64/global-merge.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/global-merge.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true | FileCheck --check-prefix=NO-MERGE %s + +; RUN: llc < %s -mtriple=arm64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE +; RUN: llc < %s -mtriple=arm64-apple-ios -O0 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE + +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O1 -global-merge-on-external=true | FileCheck %s + +; RUN: llc < %s -mtriple=arm64-apple-ios -O1 | FileCheck %s --check-prefix=CHECK-APPLE-IOS +; RUN: llc < %s -mtriple=arm64-apple-ios -O1 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: f1: +; CHECK: adrp x{{[0-9]+}}, _MergedGlobals +; CHECK-NOT: adrp + +; CHECK-APPLE-IOS-LABEL: f1: +; CHECK-APPLE-IOS: adrp x{{[0-9]+}}, __MergedGlobals +; CHECK-APPLE-IOS-NOT: adrp + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} + +; CHECK: .local _MergedGlobals +; CHECK: .comm _MergedGlobals,8,8 +; NO-MERGE-NOT: .local _MergedGlobals + +; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 +; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3 Index: test/CodeGen/AArch64/global_merge_1.ll =================================================================== --- test/CodeGen/AArch64/global_merge_1.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s - -@m = internal global i32 0, align 4 -@n = internal global i32 0, align 4 - -define void @f1(i32 %a1, i32 %a2) { -; CHECK-LABEL: f1: -; CHECK: adrp x{{[0-9]+}}, _MergedGlobals -; CHECK-NOT: adrp - store i32 %a1, i32* @m, align 4 - store i32 %a2, i32* @n, align 4 - ret void -} - -; CHECK: .local _MergedGlobals -; CHECK: .comm _MergedGlobals,8,8 - Index: test/CodeGen/ARM/global-merge-1.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/global-merge-1.ll @@ -0,0 +1,85 @@ +; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s + +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 +; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 + +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios3.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +declare i32 @calc(...) #1 + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4 + %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4 + %3 = mul <4 x i32> %2, %1 + store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #2 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0) +} + +attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"LLVM version 3.4 "} +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} Index: test/CodeGen/ARM64/global-merge.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM64/global-merge.ll @@ -0,0 +1,88 @@ +; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s +; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s + +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 +; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 +; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 + +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 +; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 +; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +@bar = internal global [5 x i32] zeroinitializer, align 4 +@baz = internal global [5 x i32] zeroinitializer, align 4 +@foo = internal global [5 x i32] zeroinitializer, align 4 + +; Function Attrs: nounwind ssp +define internal void @initialize() #0 { + %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2 + store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + ret void +} + +declare i32 @calc(...) + +; Function Attrs: nounwind ssp +define internal void @calculate() #0 { + %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4 + %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4 + %3 = mul nsw i32 %2, %1 + store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4 + %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4 + %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4 + %6 = mul nsw i32 %5, %4 + store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4 + %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4 + %9 = mul nsw i32 %8, %7 + store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4 + %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4 + %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4 + %12 = mul nsw i32 %11, %10 + store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4 + %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4 + %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4 + %15 = mul nsw i32 %14, %13 + store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4 + ret void +} + +; Function Attrs: nounwind readnone ssp +define internal i32* @returnFoo() #1 { + ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0) +} + +attributes #0 = { nounwind ssp } +attributes #1 = { nounwind readnone ssp } +attributes #2 = { nounwind } Index: test/Transforms/GlobalMerge/AArch64/global-merge-1.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalMerge/AArch64/global-merge-1.ll @@ -0,0 +1,22 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s + +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s + +; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s + +@m = internal global i32 0, align 4 +@n = internal global i32 0, align 4 + +; CHECK: @_MergedGlobals = internal global { i32, i32 } zeroinitializer + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @f1 +; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 0) +; CHECK: getelementptr inbounds ({ i32, i32 }* @_MergedGlobals, i32 0, i32 1) + store i32 %a1, i32* @m, align 4 + store i32 %a2, i32* @n, align 4 + ret void +} Index: test/Transforms/GlobalMerge/AArch64/global-merge-2.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalMerge/AArch64/global-merge-2.ll @@ -0,0 +1,30 @@ +; RUN: opt %s -mtriple=aarch64-none-linux-gnu -global-merge -global-merge-on-external -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -global-merge-on-external -S -o - | FileCheck %s +; RUN: opt %s -mtriple=arm64-apple-ios -global-merge -global-merge-on-external -S -o - | FileCheck %s + +@x = global i32 0, align 4 +@y = global i32 0, align 4 +@z = global i32 0, align 4 + +; CHECK: @_MergedGlobals_x = global { i32, i32, i32 } zeroinitializer, align 16 +; CHECK: @x = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0) +; CHECK: @y = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: @z = alias getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2) + +define void @f1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @f1 +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 0) +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) + store i32 %a1, i32* @x, align 4 + store i32 %a2, i32* @y, align 4 + ret void +} + +define void @g1(i32 %a1, i32 %a2) { +; CHECK-LABEL: @g1 +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 1) +; CHECK: getelementptr inbounds ({ i32, i32, i32 }* @_MergedGlobals_x, i32 0, i32 2) + store i32 %a1, i32* @y, align 4 + store i32 %a2, i32* @z, align 4 + ret void +} Index: test/Transforms/GlobalMerge/AArch64/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/GlobalMerge/AArch64/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'AArch64' in targets: + config.unsupported = True + Index: test/Transforms/GlobalMerge/ARM/arm.ll =================================================================== --- test/Transforms/GlobalMerge/ARM/arm.ll +++ test/Transforms/GlobalMerge/ARM/arm.ll @@ -1,23 +1,4 @@ -; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s -; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s - -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 -; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 - -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; RUN: opt %s -mtriple=arm-linux-gnuabi -global-merge -S -o - | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios3.0.0" @@ -26,6 +7,8 @@ @baz = internal global [5 x i32] zeroinitializer, align 4 @foo = internal global [5 x i32] zeroinitializer, align 4 +; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer + ; Function Attrs: nounwind ssp define internal void @initialize() #0 { %1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3 Index: test/Transforms/GlobalMerge/ARM64/arm64.ll =================================================================== --- test/Transforms/GlobalMerge/ARM64/arm64.ll +++ test/Transforms/GlobalMerge/ARM64/arm64.ll @@ -1,23 +1,6 @@ -; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O0 -o - -global-merge=true | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s -; RUN: llc %s -O1 -o - -global-merge=false | FileCheck -check-prefix=NO-MERGE %s -; RUN: llc %s -O1 -o - -global-merge=true | FileCheck -check-prefix=MERGE %s +; RUN: opt %s -mtriple=arm64-linux-gnuabi -global-merge -S -o - | FileCheck %s -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 -; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2 -; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2 - -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 -; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2 -; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2 -; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4 +; CHECK: @_MergedGlobals = internal global { [5 x i32], [5 x i32], [5 x i32] } zeroinitializer target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" target triple = "arm64-apple-ios7.0.0"