diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -433,6 +433,20 @@ return true; } +static bool CanDoGlobalSRA(GlobalVariable *GV) { + Constant *Init = GV->getInitializer(); + + if (isa(Init->getType())) { + // nothing to check + } else if (SequentialType *STy = dyn_cast(Init->getType())) { + if (STy->getNumElements() > 16 && GV->hasNUsesOrMore(16)) + return false; // It's not worth it. + } else + return false; + + return GlobalUsersSafeToSRA(GV); +} + /// Copy over the debug info for a variable to its SRA replacements. static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, @@ -462,88 +476,94 @@ /// insert so that the caller can reprocess it. static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Make sure this global only has simple uses that we can SRA. - if (!GlobalUsersSafeToSRA(GV)) + if (!CanDoGlobalSRA(GV)) return nullptr; assert(GV->hasLocalLinkage()); Constant *Init = GV->getInitializer(); Type *Ty = Init->getType(); - std::vector NewGlobals; - Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); + std::map NewGlobals; // Get the alignment of the global, either explicit or target-specific. unsigned StartAlignment = GV->getAlignment(); if (StartAlignment == 0) StartAlignment = DL.getABITypeAlignment(GV->getType()); - if (StructType *STy = dyn_cast(Ty)) { - unsigned NumElements = STy->getNumElements(); - NewGlobals.reserve(NumElements); - const StructLayout &Layout = *DL.getStructLayout(STy); - for (unsigned i = 0, e = NumElements; i != e; ++i) { - Constant *In = Init->getAggregateElement(i); - assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, - GlobalVariable::InternalLinkage, - In, GV->getName()+"."+Twine(i), - GV->getThreadLocalMode(), - GV->getType()->getAddressSpace()); - NGV->setExternallyInitialized(GV->isExternallyInitialized()); - NGV->copyAttributesFrom(GV); - Globals.push_back(NGV); - NewGlobals.push_back(NGV); + // Loop over all users and create replacement variables for used aggregate + // elements. + for (User *GEP : GV->users()) { + assert(((isa(GEP) && cast(GEP)->getOpcode() == + Instruction::GetElementPtr) || + isa(GEP)) && + "NonGEP CE's are not SRAable!"); + + // Ignore the 1th operand, which has to be zero or else the program is quite + // broken (undefined). Get the 2nd operand, which is the structure or array + // index. + unsigned ElementIdx = cast(GEP->getOperand(2))->getZExtValue(); + if (NewGlobals.count(ElementIdx) == 1) + continue; // we`ve already created replacement variable + assert(NewGlobals.count(ElementIdx) == 0); + + Type *ElTy = nullptr; + if (StructType *STy = dyn_cast(Ty)) + ElTy = STy->getElementType(ElementIdx); + else if (SequentialType *STy = dyn_cast(Ty)) + ElTy = STy->getElementType(); + assert(ElTy); + + Constant *In = Init->getAggregateElement(ElementIdx); + assert(In && "Couldn't get element of initializer?"); + + GlobalVariable *NGV = new GlobalVariable( + ElTy, false, GlobalVariable::InternalLinkage, In, + GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(), + GV->getType()->getAddressSpace()); + NGV->setExternallyInitialized(GV->isExternallyInitialized()); + NGV->copyAttributesFrom(GV); + NewGlobals.insert(std::make_pair(ElementIdx, NGV)); + + if (StructType *STy = dyn_cast(Ty)) { + const StructLayout &Layout = *DL.getStructLayout(STy); // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. - uint64_t FieldOffset = Layout.getElementOffset(i); + uint64_t FieldOffset = Layout.getElementOffset(ElementIdx); Align NewAlign(MinAlign(StartAlignment, FieldOffset)); - if (NewAlign > Align(DL.getABITypeAlignment(STy->getElementType(i)))) + if (NewAlign > + Align(DL.getABITypeAlignment(STy->getElementType(ElementIdx)))) NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); - uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(i); - transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, NumElements); - } - } else if (SequentialType *STy = dyn_cast(Ty)) { - unsigned NumElements = STy->getNumElements(); - if (NumElements > 16 && GV->hasNUsesOrMore(16)) - return nullptr; // It's not worth it. - NewGlobals.reserve(NumElements); - auto ElTy = STy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(ElTy); - Align EltAlign(DL.getABITypeAlignment(ElTy)); - uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); - for (unsigned i = 0, e = NumElements; i != e; ++i) { - Constant *In = Init->getAggregateElement(i); - assert(In && "Couldn't get element of initializer?"); - - GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, - GlobalVariable::InternalLinkage, - In, GV->getName()+"."+Twine(i), - GV->getThreadLocalMode(), - GV->getType()->getAddressSpace()); - NGV->setExternallyInitialized(GV->isExternallyInitialized()); - NGV->copyAttributesFrom(GV); - Globals.push_back(NGV); - NewGlobals.push_back(NGV); + uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); + transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, + STy->getNumElements()); + } else if (SequentialType *STy = dyn_cast(Ty)) { + uint64_t EltSize = DL.getTypeAllocSize(ElTy); + Align EltAlign(DL.getABITypeAlignment(ElTy)); + uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); // Calculate the known alignment of the field. If the original aggregate // had 256 byte alignment for example, something might depend on that: // propagate info to each field. - Align NewAlign(MinAlign(StartAlignment, EltSize * i)); + Align NewAlign(MinAlign(StartAlignment, EltSize * ElementIdx)); if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); - transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits, - NumElements); + transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, + FragmentSizeInBits, STy->getNumElements()); } } if (NewGlobals.empty()) return nullptr; + Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); + for (auto NewGlobalVar : NewGlobals) + Globals.push_back(NewGlobalVar.second); + LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); @@ -559,11 +579,11 @@ // Ignore the 1th operand, which has to be zero or else the program is quite // broken (undefined). Get the 2nd operand, which is the structure or array // index. - unsigned Val = cast(GEP->getOperand(2))->getZExtValue(); - if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. + unsigned ElementIdx = cast(GEP->getOperand(2))->getZExtValue(); + assert(NewGlobals.count(ElementIdx) == 1); - Value *NewPtr = NewGlobals[Val]; - Type *NewTy = NewGlobals[Val]->getValueType(); + Value *NewPtr = NewGlobals[ElementIdx]; + Type *NewTy = NewGlobals[ElementIdx]->getValueType(); // Form a shorter GEP if needed. if (GEP->getNumOperands() > 3) { @@ -581,7 +601,8 @@ for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); NewPtr = GetElementPtrInst::Create( - NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI); + NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx), + GEPI); } } GEP->replaceAllUsesWith(NewPtr); @@ -596,17 +617,8 @@ Globals.erase(GV); ++NumSRA; - // Loop over the new globals array deleting any globals that are obviously - // dead. This can arise due to scalarization of a structure or an array that - // has elements that are dead. - unsigned FirstGlobal = 0; - for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i) - if (NewGlobals[i]->use_empty()) { - Globals.erase(NewGlobals[i]); - if (FirstGlobal == i) ++FirstGlobal; - } - - return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; + assert(NewGlobals.size() > 0); + return NewGlobals.begin()->second; } /// Return true if all users of the specified value will trap if the value is diff --git a/llvm/test/Transforms/GlobalOpt/long-compilation-global-sra.ll b/llvm/test/Transforms/GlobalOpt/long-compilation-global-sra.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/long-compilation-global-sra.ll @@ -0,0 +1,61 @@ +; RUN: opt %s --O0 -globalopt -S -o - + +; This is a regression test against very slow execution... +; In bad case it should fail by timeout. + +; Hand-reduced from this example. +; clang++ -mllvm -disable-llvm-optzns + +;#include +; +;namespace { +; char LargeBuffer[64 * 1024 * 1024]; +;} +; +;int main ( void ) { +; +; LargeBuffer[0] = 0; +; +; printf(""); +; +; return LargeBuffer[0] == 0; +;} + +; check that global array LargeBufferE was optimized out +; and local variable LargeBufferE.0 was used instead. + +; CHECK-NOT: global +; CHECK: main() +; CHECK-NEXT: LargeBufferE.0 +; CHECK-NOT: global + +; ModuleID = 'test.cpp' +source_filename = "test.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@LargeBufferE = internal global [67108864 x i8] zeroinitializer, align 16 +@.str = private unnamed_addr constant [1 x i8] c"\00", align 1 + +; Function Attrs: norecurse uwtable +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i8 0, i8* getelementptr inbounds ([67108864 x i8], [67108864 x i8]* @LargeBufferE, i64 0, i64 0), align 16 + %2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)) + %3 = load i8, i8* getelementptr inbounds ([67108864 x i8], [67108864 x i8]* @LargeBufferE, i64 0, i64 0), align 16 + %4 = sext i8 %3 to i32 + %5 = icmp eq i32 %4, 0 + %6 = zext i1 %5 to i32 + ret i32 %6 +} + +declare dso_local i32 @printf(i8*, ...) #0 + +attributes #0 = { norecurse uwtable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 "}