Index: llvm/trunk/lib/CodeGen/GlobalMerge.cpp =================================================================== --- llvm/trunk/lib/CodeGen/GlobalMerge.cpp +++ llvm/trunk/lib/CodeGen/GlobalMerge.cpp @@ -440,6 +440,7 @@ assert(Globals.size() > 1); Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Type *Int8Ty = Type::getInt8Ty(M.getContext()); auto &DL = M.getDataLayout(); LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #" @@ -452,17 +453,31 @@ uint64_t MergedSize = 0; std::vector Tys; std::vector Inits; + std::vector StructIdxs; bool HasExternal = false; StringRef FirstExternalName; + unsigned MaxAlign = 1; + unsigned CurIdx = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getValueType(); + unsigned Align = DL.getPreferredAlignment(Globals[j]); + unsigned Padding = alignTo(MergedSize, Align) - MergedSize; + MergedSize += Padding; MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { break; } + if (Padding) { + Tys.push_back(ArrayType::get(Int8Ty, Padding)); + Inits.push_back(ConstantAggregateZero::get(Tys.back())); + ++CurIdx; + } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); + StructIdxs.push_back(CurIdx++); + + MaxAlign = std::max(MaxAlign, Align); if (Globals[j]->hasExternalLinkage() && !HasExternal) { HasExternal = true; @@ -481,7 +496,8 @@ GlobalValue::LinkageTypes Linkage = HasExternal ? GlobalValue::ExternalLinkage : GlobalValue::InternalLinkage; - StructType *MergedTy = StructType::get(M.getContext(), Tys); + // Use a packed struct so we can control alignment. + StructType *MergedTy = StructType::get(M.getContext(), Tys, true); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); // On Darwin external linkage needs to be preserved, otherwise @@ -499,13 +515,9 @@ M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr, GlobalVariable::NotThreadLocal, AddrSpace); - const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); - // Set the alignment of the merged struct as the maximum alignment of the - // globals to prevent over-alignment. We don't handle globals that are not - // default aligned, so the alignment of the MergedLayout struct is - // equivalent. - MergedGV->setAlignment(MergedLayout->getAlignment()); + MergedGV->setAlignment(MaxAlign); + const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); std::string Name = Globals[k]->getName(); @@ -514,11 +526,12 @@ // Copy metadata while adjusting any debug info metadata by the original // global's offset within the merged global. - MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx)); + MergedGV->copyMetadata(Globals[k], + MergedLayout->getElementOffset(StructIdxs[idx])); Constant *Idx[2] = { - ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, idx), + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, StructIdxs[idx]), }; Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); @@ -531,8 +544,8 @@ // It's not safe on Mach-O as the alias (and thus the portion of the // MergedGlobals variable) may be dead stripped at link time. if (Linkage != GlobalValue::InternalLinkage || !IsMachO) { - GlobalAlias *GA = - GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M); + GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace, + Linkage, Name, GEP, &M); GA->setDLLStorageClass(DLLStorage); } @@ -610,12 +623,6 @@ unsigned AddressSpace = PT->getAddressSpace(); - // Ignore fancy-aligned globals for now. - unsigned Alignment = DL.getPreferredAlignment(&GV); - Type *Ty = GV.getValueType(); - if (Alignment > DL.getABITypeAlignment(Ty)) - continue; - // Ignore all 'special' globals. if (GV.getName().startswith("llvm.") || GV.getName().startswith(".llvm.")) @@ -625,6 +632,7 @@ if (isMustKeepGlobalVariable(&GV)) continue; + Type *Ty = GV.getValueType(); if (DL.getTypeAllocSize(Ty) < MaxOffset) { if (TM && TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) Index: llvm/trunk/test/Transforms/GlobalMerge/alignment-2.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalMerge/alignment-2.ll +++ llvm/trunk/test/Transforms/GlobalMerge/alignment-2.ll @@ -0,0 +1,22 @@ +; RUN: opt -global-merge -global-merge-max-offset=100 -S -o - %s | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; This produces align 4, not the obvious align 1, to be consistent with what +; the AsmPrinter would do. +; CHECK: @_MergedGlobals = private global <{ [2 x i32], [2 x i32] }> <{ [2 x i32] [i32 1, i32 1], [2 x i32] [i32 2, i32 2] }>, align 4 + +; CHECK: @a = internal alias [2 x i32], getelementptr inbounds (<{ [2 x i32], [2 x i32] }>, <{ [2 x i32], [2 x i32] }>* @_MergedGlobals, i32 0, i32 0) +@a = internal global [2 x i32] [i32 1, i32 1], align 1 + +; CHECK: @b = internal alias [2 x i32], getelementptr inbounds (<{ [2 x i32], [2 x i32] }>, <{ [2 x i32], [2 x i32] }>* @_MergedGlobals, i32 0, i32 1) +@b = internal global [2 x i32] [i32 2, i32 2], align 1 + +define void @use() { + ; CHECK: load i32, i32* getelementptr inbounds (<{ [2 x i32], [2 x i32] }>, <{ [2 x i32], [2 x i32] }>* @_MergedGlobals, i32 0, i32 0, i32 0) + %x = load i32, i32* bitcast ([2 x i32]* @a to i32*) + ; CHECK: load i32, i32* getelementptr inbounds (<{ [2 x i32], [2 x i32] }>, <{ [2 x i32], [2 x i32] }>* @_MergedGlobals, i32 0, i32 1, i32 0) + %y = load i32, i32* bitcast ([2 x i32]* @b to i32*) + ret void +} Index: llvm/trunk/test/Transforms/GlobalMerge/alignment.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalMerge/alignment.ll +++ llvm/trunk/test/Transforms/GlobalMerge/alignment.ll @@ -0,0 +1,20 @@ +; RUN: opt -global-merge -global-merge-max-offset=100 -S -o - %s | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @_MergedGlobals = private global <{ [5 x i8], [3 x i8], [2 x i32] }> <{ [5 x i8] c"\01\01\01\01\01", [3 x i8] zeroinitializer, [2 x i32] [i32 2, i32 2] }>, align 4 + +; CHECK: @a = internal alias [5 x i8], getelementptr inbounds (<{ [5 x i8], [3 x i8], [2 x i32] }>, <{ [5 x i8], [3 x i8], [2 x i32] }>* @_MergedGlobals, i32 0, i32 0) +@a = internal global [5 x i8] [i8 1, i8 1, i8 1, i8 1, i8 1], align 4 + +; CHECK: @b = internal alias [2 x i32], getelementptr inbounds (<{ [5 x i8], [3 x i8], [2 x i32] }>, <{ [5 x i8], [3 x i8], [2 x i32] }>* @_MergedGlobals, i32 0, i32 2) +@b = internal global [2 x i32] [i32 2, i32 2] + +define void @use() { + ; CHECK: load i32, i32* bitcast (<{ [5 x i8], [3 x i8], [2 x i32] }>* @_MergedGlobals to i32*) + %x = load i32, i32* bitcast ([5 x i8]* @a to i32*) + ; CHECK: load i32, i32* getelementptr inbounds (<{ [5 x i8], [3 x i8], [2 x i32] }>, <{ [5 x i8], [3 x i8], [2 x i32] }>* @_MergedGlobals, i32 0, i32 2, i32 0) + %y = load i32, i32* bitcast ([2 x i32]* @b to i32*) + ret void +} Index: llvm/trunk/test/Transforms/GlobalMerge/basic.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalMerge/basic.ll +++ llvm/trunk/test/Transforms/GlobalMerge/basic.ll @@ -3,18 +3,18 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @_MergedGlobals = private global { i32, i32 } { i32 1, i32 2 } +; CHECK: @_MergedGlobals = private global <{ i32, i32 }> <{ i32 1, i32 2 }>, align 4 -; CHECK: @a = internal alias i32, getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 0) +; CHECK: @a = internal alias i32, getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @_MergedGlobals, i32 0, i32 0) @a = internal global i32 1 -; CHECK: @b = internal alias i32, getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 1) +; CHECK: @b = internal alias i32, getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @_MergedGlobals, i32 0, i32 1) @b = internal global i32 2 define void @use() { - ; CHECK: load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 0) + ; CHECK: load i32, i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @_MergedGlobals, i32 0, i32 0) %x = load i32, i32* @a - ; CHECK: load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 1) + ; CHECK: load i32, i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @_MergedGlobals, i32 0, i32 1) %y = load i32, i32* @b ret void } Index: llvm/trunk/test/Transforms/GlobalMerge/debug-info.ll =================================================================== --- llvm/trunk/test/Transforms/GlobalMerge/debug-info.ll +++ llvm/trunk/test/Transforms/GlobalMerge/debug-info.ll @@ -3,7 +3,7 @@ source_filename = "test/Transforms/GlobalMerge/debug-info.ll" target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @_MergedGlobals = private global { i32, i32 } { i32 1, i32 2 }, align 4, !dbg [[A:![0-9]+]], !dbg [[B:![0-9]+]] +; CHECK: @_MergedGlobals = private global <{ i32, i32 }> <{ i32 1, i32 2 }>, align 4, !dbg [[A:![0-9]+]], !dbg [[B:![0-9]+]] @a = internal global i32 1, !dbg !0 @b = internal global i32 2, !dbg !2