diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -78,7 +78,6 @@ STATISTIC(NumMarked , "Number of globals marked constant"); STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); -STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); STATISTIC(NumDeleted , "Number of globals deleted"); STATISTIC(NumGlobUses , "Number of global uses devirtualized"); @@ -996,9 +995,9 @@ /// Scan the use-list of V checking to make sure that there are no complex uses /// of V. We permit simple things like dereferencing the pointer, but not /// storing through the address, unless it is to the specified global. -static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, - const GlobalVariable *GV, - SmallPtrSetImpl &PHIs) { +static bool +valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV) { for (const User *U : V->users()) { const Instruction *Inst = cast(U); @@ -1012,492 +1011,17 @@ continue; // Otherwise, storing through it, or storing into GV... fine. } - // Must index into the array and into the struct. - if (isa(Inst) && Inst->getNumOperands() >= 3) { - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) - return false; - continue; - } - - if (const PHINode *PN = dyn_cast(Inst)) { - // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI - // cycles. - if (PHIs.insert(PN).second) - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)) - return false; - continue; - } - if (const BitCastInst *BCI = dyn_cast(Inst)) { - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) - return false; - continue; - } - - return false; - } - return true; -} - -/// The Alloc pointer is stored into GV somewhere. Transform all uses of the -/// allocation into loads from the global and uses of the resultant pointer. -/// Further, delete the store into GV. This assumes that these value pass the -/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. -static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, - GlobalVariable *GV) { - while (!Alloc->use_empty()) { - Instruction *U = cast(*Alloc->user_begin()); - Instruction *InsertPt = U; - if (StoreInst *SI = dyn_cast(U)) { - // If this is the store of the allocation into the global, remove it. - if (SI->getOperand(1) == GV) { - SI->eraseFromParent(); - continue; - } - } else if (PHINode *PN = dyn_cast(U)) { - // Insert the load in the corresponding predecessor, not right before the - // PHI. - InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator(); - } else if (isa(U)) { - // Must be bitcast between the malloc and store to initialize the global. - ReplaceUsesOfMallocWithGlobal(U, GV); - U->eraseFromParent(); - continue; - } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { - // If this is a "GEP bitcast" and the user is a store to the global, then - // just process it as a bitcast. - if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse()) - if (StoreInst *SI = dyn_cast(GEPI->user_back())) - if (SI->getOperand(1) == GV) { - // Must be bitcast GEP between the malloc and store to initialize - // the global. - ReplaceUsesOfMallocWithGlobal(GEPI, GV); - GEPI->eraseFromParent(); - continue; - } - } - - // Insert a load from the global, and use it instead of the malloc. - Value *NL = - new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt); - U->replaceUsesOfWith(Alloc, NL); - } -} - -/// Verify that all uses of V (a load, or a phi of a load) are simple enough to -/// perform heap SRA on. This permits GEP's that index through the array and -/// struct field, icmps of null, and PHIs. -static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, - SmallPtrSetImpl &LoadUsingPHIs, - SmallPtrSetImpl &LoadUsingPHIsPerLoad) { - // We permit two users of the load: setcc comparing against the null - // pointer, and a getelementptr of a specific form. - for (const User *U : V->users()) { - const Instruction *UI = cast(U); - - // Comparison against null is ok. - if (const ICmpInst *ICI = dyn_cast(UI)) { - if (!isa(ICI->getOperand(1))) - return false; - continue; - } - - // getelementptr is also ok, but only a simple form. - if (const GetElementPtrInst *GEPI = dyn_cast(UI)) { - // Must index into the array and into the struct. - if (GEPI->getNumOperands() < 3) + if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV)) return false; - - // Otherwise the GEP is ok. continue; } - if (const PHINode *PN = dyn_cast(UI)) { - if (!LoadUsingPHIsPerLoad.insert(PN).second) - // This means some phi nodes are dependent on each other. - // Avoid infinite looping! - return false; - if (!LoadUsingPHIs.insert(PN).second) - // If we have already analyzed this PHI, then it is safe. - continue; - - // Make sure all uses of the PHI are simple enough to transform. - if (!LoadUsesSimpleEnoughForHeapSRA(PN, - LoadUsingPHIs, LoadUsingPHIsPerLoad)) - return false; - - continue; - } - - // Otherwise we don't know what this is, not ok. return false; } - - return true; -} - -/// If all users of values loaded from GV are simple enough to perform HeapSRA, -/// return true. -static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, - Instruction *StoredVal) { - SmallPtrSet LoadUsingPHIs; - SmallPtrSet LoadUsingPHIsPerLoad; - for (const User *U : GV->users()) - if (const LoadInst *LI = dyn_cast(U)) { - if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, - LoadUsingPHIsPerLoad)) - return false; - LoadUsingPHIsPerLoad.clear(); - } - - // If we reach here, we know that all uses of the loads and transitive uses - // (through PHI nodes) are simple enough to transform. However, we don't know - // that all inputs the to the PHI nodes are in the same equivalence sets. - // Check to verify that all operands of the PHIs are either PHIS that can be - // transformed, loads from GV, or MI itself. - for (const PHINode *PN : LoadUsingPHIs) { - for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { - Value *InVal = PN->getIncomingValue(op); - - // PHI of the stored value itself is ok. - if (InVal == StoredVal) continue; - - if (const PHINode *InPN = dyn_cast(InVal)) { - // One of the PHIs in our set is (optimistically) ok. - if (LoadUsingPHIs.count(InPN)) - continue; - return false; - } - - // Load from GV is ok. - if (const LoadInst *LI = dyn_cast(InVal)) - if (LI->getOperand(0) == GV) - continue; - - // UNDEF? NULL? - - // Anything else is rejected. - return false; - } - } - return true; } -static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, - DenseMap> &InsertedScalarizedValues, - std::vector> &PHIsToRewrite) { - std::vector &FieldVals = InsertedScalarizedValues[V]; - - if (FieldNo >= FieldVals.size()) - FieldVals.resize(FieldNo+1); - - // If we already have this value, just reuse the previously scalarized - // version. - if (Value *FieldVal = FieldVals[FieldNo]) - return FieldVal; - - // Depending on what instruction this is, we have several cases. - Value *Result; - if (LoadInst *LI = dyn_cast(V)) { - // This is a scalarized version of the load from the global. Just create - // a new Load of the scalarized global. - Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo, - InsertedScalarizedValues, PHIsToRewrite); - Result = new LoadInst(V->getType()->getPointerElementType(), V, - LI->getName() + ".f" + Twine(FieldNo), LI); - } else { - PHINode *PN = cast(V); - // PN's type is pointer to struct. Make a new PHI of pointer to struct - // field. - - PointerType *PTy = cast(PN->getType()); - StructType *ST = cast(PTy->getElementType()); - - unsigned AS = PTy->getAddressSpace(); - PHINode *NewPN = - PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS), - PN->getNumIncomingValues(), - PN->getName()+".f"+Twine(FieldNo), PN); - Result = NewPN; - PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); - } - - return FieldVals[FieldNo] = Result; -} - -/// Given a load instruction and a value derived from the load, rewrite the -/// derived value to use the HeapSRoA'd load. -static void RewriteHeapSROALoadUser(Instruction *LoadUser, - DenseMap> &InsertedScalarizedValues, - std::vector> &PHIsToRewrite) { - // If this is a comparison against null, handle it. - if (ICmpInst *SCI = dyn_cast(LoadUser)) { - assert(isa(SCI->getOperand(1))); - // If we have a setcc of the loaded pointer, we can use a setcc of any - // field. - Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, - InsertedScalarizedValues, PHIsToRewrite); - - Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), - SCI->getName()); - SCI->replaceAllUsesWith(New); - SCI->eraseFromParent(); - return; - } - - // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' - if (GetElementPtrInst *GEPI = dyn_cast(LoadUser)) { - assert(GEPI->getNumOperands() >= 3 && isa(GEPI->getOperand(2)) - && "Unexpected GEPI!"); - - // Load the pointer for this field. - unsigned FieldNo = cast(GEPI->getOperand(2))->getZExtValue(); - Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, - InsertedScalarizedValues, PHIsToRewrite); - - // Create the new GEP idx vector. - SmallVector GEPIdx; - GEPIdx.push_back(GEPI->getOperand(1)); - GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - - Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx, - GEPI->getName(), GEPI); - GEPI->replaceAllUsesWith(NGEPI); - GEPI->eraseFromParent(); - return; - } - - // Recursively transform the users of PHI nodes. This will lazily create the - // PHIs that are needed for individual elements. Keep track of what PHIs we - // see in InsertedScalarizedValues so that we don't get infinite loops (very - // antisocial). If the PHI is already in InsertedScalarizedValues, it has - // already been seen first by another load, so its uses have already been - // processed. - PHINode *PN = cast(LoadUser); - if (!InsertedScalarizedValues.insert(std::make_pair(PN, - std::vector())).second) - return; - - // If this is the first time we've seen this PHI, recursively process all - // users. - for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { - Instruction *User = cast(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); - } -} - -/// We are performing Heap SRoA on a global. Ptr is a value loaded from the -/// global. Eliminate all uses of Ptr, making them use FieldGlobals instead. -/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA. -static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, - DenseMap> &InsertedScalarizedValues, - std::vector > &PHIsToRewrite) { - for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { - Instruction *User = cast(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); - } - - if (Load->use_empty()) { - Load->eraseFromParent(); - InsertedScalarizedValues.erase(Load); - } -} - -/// CI is an allocation of an array of structures. Break it up into multiple -/// allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, const DataLayout &DL, - const TargetLibraryInfo *TLI) { - LLVM_DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI - << '\n'); - Type *MAT = getMallocAllocatedType(CI, TLI); - StructType *STy = cast(MAT); - - // There is guaranteed to be at least one use of the malloc (storing - // it into GV). If there are other uses, change them to be uses of - // the global to simplify later code. This also deletes the store - // into GV. - ReplaceUsesOfMallocWithGlobal(CI, GV); - - // Okay, at this point, there are no users of the malloc. Insert N - // new mallocs at the same place as CI, and N globals. - std::vector FieldGlobals; - std::vector FieldMallocs; - - SmallVector OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); - - unsigned AS = GV->getType()->getPointerAddressSpace(); - for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - Type *FieldTy = STy->getElementType(FieldNo); - PointerType *PFieldTy = PointerType::get(FieldTy, AS); - - GlobalVariable *NGV = new GlobalVariable( - *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), - nullptr, GV->getThreadLocalMode()); - NGV->copyAttributesFrom(GV); - FieldGlobals.push_back(NGV); - - unsigned TypeSize = DL.getTypeAllocSize(FieldTy); - if (StructType *ST = dyn_cast(FieldTy)) - TypeSize = DL.getStructLayout(ST)->getSizeInBytes(); - Type *IntPtrTy = DL.getIntPtrType(CI->getType()); - Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, - ConstantInt::get(IntPtrTy, TypeSize), - NElems, OpBundles, nullptr, - CI->getName() + ".f" + Twine(FieldNo)); - FieldMallocs.push_back(NMI); - new StoreInst(NMI, NGV, CI); - } - - // The tricky aspect of this transformation is handling the case when malloc - // fails. In the original code, malloc failing would set the result pointer - // of malloc to null. In this case, some mallocs could succeed and others - // could fail. As such, we emit code that looks like this: - // F0 = malloc(field0) - // F1 = malloc(field1) - // F2 = malloc(field2) - // if (F0 == 0 || F1 == 0 || F2 == 0) { - // if (F0) { free(F0); F0 = 0; } - // if (F1) { free(F1); F1 = 0; } - // if (F2) { free(F2); F2 = 0; } - // } - // The malloc can also fail if its argument is too large. - Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); - Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), - ConstantZero, "isneg"); - for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull"); - RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); - } - - // Split the basic block at the old malloc. - BasicBlock *OrigBB = CI->getParent(); - BasicBlock *ContBB = - OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont"); - - // Create the block to check the first condition. Put all these blocks at the - // end of the function as they are unlikely to be executed. - BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), - "malloc_ret_null", - OrigBB->getParent()); - - // Remove the uncond branch from OrigBB to ContBB, turning it into a cond - // branch on RunningOr. - OrigBB->getTerminator()->eraseFromParent(); - BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - - // Within the NullPtrBlock, we need to emit a comparison and branch for each - // pointer, because some may be null while others are not. - for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - Value *GVVal = - new LoadInst(cast(FieldGlobals[i])->getValueType(), - FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, - Constant::getNullValue(GVVal->getType())); - BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", - OrigBB->getParent()); - BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", - OrigBB->getParent()); - Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, - Cmp, NullPtrBlock); - - // Fill in FreeBlock. - CallInst::CreateFree(GVVal, OpBundles, BI); - new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], - FreeBlock); - BranchInst::Create(NextBlock, FreeBlock); - - NullPtrBlock = NextBlock; - } - - BranchInst::Create(ContBB, NullPtrBlock); - - // CI is no longer needed, remove it. - CI->eraseFromParent(); - - /// As we process loads, if we can't immediately update all uses of the load, - /// keep track of what scalarized loads are inserted for a given load. - DenseMap> InsertedScalarizedValues; - InsertedScalarizedValues[GV] = FieldGlobals; - - std::vector> PHIsToRewrite; - - // Okay, the malloc site is completely handled. All of the uses of GV are now - // loads, and all uses of those loads are simple. Rewrite them to use loads - // of the per-field globals instead. - for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { - Instruction *User = cast(*UI++); - - if (LoadInst *LI = dyn_cast(User)) { - RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); - continue; - } - - // Must be a store of null. - StoreInst *SI = cast(User); - assert(isa(SI->getOperand(0)) && - "Unexpected heap-sra user!"); - - // Insert a store of null into each global. - for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - Type *ValTy = cast(FieldGlobals[i])->getValueType(); - Constant *Null = Constant::getNullValue(ValTy); - new StoreInst(Null, FieldGlobals[i], SI); - } - // Erase the original store. - SI->eraseFromParent(); - } - - // While we have PHIs that are interesting to rewrite, do it. - while (!PHIsToRewrite.empty()) { - PHINode *PN = PHIsToRewrite.back().first; - unsigned FieldNo = PHIsToRewrite.back().second; - PHIsToRewrite.pop_back(); - PHINode *FieldPN = cast(InsertedScalarizedValues[PN][FieldNo]); - assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); - - // Add all the incoming values. This can materialize more phis. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *InVal = PN->getIncomingValue(i); - InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, - PHIsToRewrite); - FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); - } - } - - // Drop all inter-phi links and any loads that made it this far. - for (DenseMap>::iterator - I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); - I != E; ++I) { - if (PHINode *PN = dyn_cast(I->first)) - PN->dropAllReferences(); - else if (LoadInst *LI = dyn_cast(I->first)) - LI->dropAllReferences(); - } - - // Delete all the phis and loads now that inter-references are dead. - for (DenseMap>::iterator - I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); - I != E; ++I) { - if (PHINode *PN = dyn_cast(I->first)) - PN->eraseFromParent(); - else if (LoadInst *LI = dyn_cast(I->first)) - LI->eraseFromParent(); - } - - // The old global is now dead, remove it. - GV->eraseFromParent(); - - ++NumHeapSRA; - return cast(FieldGlobals[0]); -} - /// This function is called when we see a pointer global variable with a single /// value stored it that is a malloc or cast of malloc. static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, @@ -1520,11 +1044,9 @@ // We can't optimize this if the malloc itself is used in a complex way, // for example, being stored into multiple globals. This allows the - // malloc to be stored into the specified global, loaded icmp'd, and - // GEP'd. These are all things we could transform to using the global - // for. - SmallPtrSet PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + // malloc to be stored into the specified global, loaded icmp'd. + // These are all things we could transform to using the global for. + if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV)) return false; // If we have a global that is only initialized with a fixed size malloc, @@ -1545,54 +1067,6 @@ return true; } - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - if (Ordering != AtomicOrdering::NotAtomic) - return false; - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - if (ArrayType *AT = dyn_cast(AllocTy)) - AllocTy = AT->getElementType(); - - StructType *AllocSTy = dyn_cast(AllocTy); - if (!AllocSTy) - return false; - - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (ArrayType *AT = dyn_cast(getMallocAllocatedType(CI, TLI))) { - Type *IntPtrTy = DL.getIntPtrType(CI->getType()); - unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); - Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); - Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); - SmallVector OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); - Instruction *Malloc = - CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, - OpBundles, nullptr, CI->getName()); - Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); - CI->replaceAllUsesWith(Cast); - CI->eraseFromParent(); - if (BitCastInst *BCI = dyn_cast(Malloc)) - CI = cast(BCI->getOperand(0)); - else - CI = cast(Malloc); - } - - PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL, - TLI); - return true; - } - return false; } diff --git a/llvm/test/Transforms/GlobalOpt/MallocSROA-section.ll b/llvm/test/Transforms/GlobalOpt/MallocSROA-section.ll --- a/llvm/test/Transforms/GlobalOpt/MallocSROA-section.ll +++ b/llvm/test/Transforms/GlobalOpt/MallocSROA-section.ll @@ -1,8 +1,5 @@ ; RUN: opt -globalopt -S < %s | FileCheck %s -; CHECK: @Y.f0 -; CHECK: section ".foo" -; CHECK: @Y.f1 -; CHECK: section ".foo" +; CHECK: @Y = {{.*}} section ".foo" %struct.xyz = type { double, i32 } diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll --- a/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll +++ b/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll @@ -1,10 +1,10 @@ ; RUN: opt < %s -globalopt -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" +;; Heap SROA has been removed. This tests we don't perform heap SROA. +; CHECK: @X = %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null -; CHECK: @X.f0 -; CHECK: @X.f1 define void @bar(i64 %Size) nounwind noinline { entry: diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll --- a/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll +++ b/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll @@ -1,10 +1,10 @@ ; RUN: opt < %s -globalopt -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" +;; Heap SROA has been removed. This tests we don't perform heap SROA. +; CHECK: @X = %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2] -; CHECK: @X.f0 -; CHECK: @X.f1 define void @bar(i32 %Size) nounwind noinline { entry: diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-3-no-null-opt.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-3-no-null-opt.ll deleted file mode 100644 --- a/llvm/test/Transforms/GlobalOpt/heap-sra-3-no-null-opt.ll +++ /dev/null @@ -1,41 +0,0 @@ -; RUN: opt < %s -globalopt -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -%struct.foo = type { i32, i32 } -@X = internal global %struct.foo* null -; CHECK: @X -; CHECK-NOT: @X.f0 - -define void @bar(i64 %Size) nounwind noinline #0 { -entry: - %mallocsize = mul i64 8, %Size ; [#uses=1] -; CHECK: mul i64 8, %Size - %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1] - %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] - store %struct.foo* %.sub, %struct.foo** @X, align 4 - ret void -} - -declare noalias i8* @malloc(i64) - -define i32 @baz() nounwind readonly noinline #0 { -bb1.thread: -; CHECK: load %struct.foo*, %struct.foo** @X, align 4 - %0 = load %struct.foo*, %struct.foo** @X, align 4 - br label %bb1 - -bb1: ; preds = %bb1, %bb1.thread - %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] - %sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ] - %1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0 - %2 = load i32, i32* %1, align 4 - %3 = add i32 %2, %sum.0.reg2mem.0 - %indvar.next = add i32 %i.0.reg2mem.0, 1 - %exitcond = icmp eq i32 %indvar.next, 1200 - br i1 %exitcond, label %bb2, label %bb1 - -bb2: ; preds = %bb1 - ret i32 %3 -} - -attributes #0 = { null_pointer_is_valid } diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-3.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-3.ll deleted file mode 100644 --- a/llvm/test/Transforms/GlobalOpt/heap-sra-3.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: opt < %s -globalopt -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - - %struct.foo = type { i32, i32 } -@X = internal global %struct.foo* null -; CHECK: @X.f0 -; CHECK: @X.f1 - -define void @bar(i64 %Size) nounwind noinline { -entry: - %mallocsize = mul i64 8, %Size ; [#uses=1] -; CHECK: mul i64 %Size, 4 - %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1] - %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] - store %struct.foo* %.sub, %struct.foo** @X, align 4 - ret void -} - -declare noalias i8* @malloc(i64) - -define i32 @baz() nounwind readonly noinline { -bb1.thread: - %0 = load %struct.foo*, %struct.foo** @X, align 4 - br label %bb1 - -bb1: ; preds = %bb1, %bb1.thread - %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] - %sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ] - %1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0 - %2 = load i32, i32* %1, align 4 - %3 = add i32 %2, %sum.0.reg2mem.0 - %indvar.next = add i32 %i.0.reg2mem.0, 1 - %exitcond = icmp eq i32 %indvar.next, 1200 - br i1 %exitcond, label %bb2, label %bb1 - -bb2: ; preds = %bb1 - ret i32 %3 -} - -define void @bam(i64 %Size) nounwind noinline #0 { -entry: - %0 = load %struct.foo*, %struct.foo** @X, align 4 - ret void -} - -attributes #0 = { null_pointer_is_valid } diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-4-no-null-opt.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-4-no-null-opt.ll deleted file mode 100644 --- a/llvm/test/Transforms/GlobalOpt/heap-sra-4-no-null-opt.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: opt < %s -globalopt -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -%struct.foo = type { i32, i32 } - -@X = internal global %struct.foo* null -; CHECK: @X -; CHECK-NOT: @X.f0 - -define void @bar(i64 %Size) nounwind noinline #0 { -entry: - %mallocsize = shl i64 %Size, 3 ; [#uses=1] - %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1] -; CHECK: shl i64 %Size, 3 - %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] - store %struct.foo* %.sub, %struct.foo** @X, align 4 - ret void -} - -declare noalias i8* @malloc(i64) - -define i32 @baz() nounwind readonly noinline #0 { -; CHECK-LABEL: @baz( -bb1.thread: -; CHECK: load %struct.foo*, %struct.foo** @X, align 4 - %0 = load %struct.foo*, %struct.foo** @X, align 4 - br label %bb1 - -bb1: ; preds = %bb1, %bb1.thread - %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] - %sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ] - %1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0 - %2 = load i32, i32* %1, align 4 -; CHECK: load i32, i32* %1, align 4 - %3 = add i32 %2, %sum.0.reg2mem.0 - %indvar.next = add i32 %i.0.reg2mem.0, 1 - %exitcond = icmp eq i32 %indvar.next, 1200 - br i1 %exitcond, label %bb2, label %bb1 - -bb2: ; preds = %bb1 - ret i32 %3 -} - -attributes #0 = { null_pointer_is_valid } diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-4.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-4.ll deleted file mode 100644 --- a/llvm/test/Transforms/GlobalOpt/heap-sra-4.ll +++ /dev/null @@ -1,47 +0,0 @@ -; RUN: opt < %s -globalopt -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - - %struct.foo = type { i32, i32 } -@X = internal global %struct.foo* null -; CHECK: @X.f0 -; CHECK: @X.f1 - -define void @bar(i64 %Size) nounwind noinline { -entry: - %mallocsize = shl i64 %Size, 3 ; [#uses=1] - %malloccall = tail call i8* @malloc(i64 %mallocsize) ; [#uses=1] -; CHECK: mul i64 %Size, 4 - %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1] - store %struct.foo* %.sub, %struct.foo** @X, align 4 - ret void -} - -declare noalias i8* @malloc(i64) - -define i32 @baz() nounwind readonly noinline { -bb1.thread: - %0 = load %struct.foo*, %struct.foo** @X, align 4 - br label %bb1 - -bb1: ; preds = %bb1, %bb1.thread - %i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] - %sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ] - %1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0 - %2 = load i32, i32* %1, align 4 - %3 = add i32 %2, %sum.0.reg2mem.0 - %indvar.next = add i32 %i.0.reg2mem.0, 1 - %exitcond = icmp eq i32 %indvar.next, 1200 - br i1 %exitcond, label %bb2, label %bb1 - -bb2: ; preds = %bb1 - ret i32 %3 -} - -define void @bam(i64 %Size) nounwind noinline #0 { -entry: - %0 = load %struct.foo*, %struct.foo** @X, align 4 - ret void -} - -attributes #0 = { null_pointer_is_valid } - diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll --- a/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll +++ b/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll @@ -1,8 +1,9 @@ ; RUN: opt < %s -globalopt -S | FileCheck %s -; CHECK: tmp.f1 = phi i32* -; CHECK: tmp.f0 = phi i32* + target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" +;; Heap SROA has been removed. This tests we don't perform heap SROA. +; CHECK: @X = %struct.foo = type { i32, i32 } @X = internal global %struct.foo* null ; <%struct.foo**> [#uses=2]