diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -337,107 +337,68 @@ return Changed; } -static bool isSafeSROAElementUse(Value *V); - -/// Return true if the specified GEP is a safe user of a derived -/// expression from a global that we want to SROA. -static bool isSafeSROAGEP(User *U) { - // Check to see if this ConstantExpr GEP is SRA'able. In particular, we - // don't like < 3 operand CE's, and we don't like non-constant integer - // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some - // value of C. - if (U->getNumOperands() < 3 || !isa(U->getOperand(1)) || - !cast(U->getOperand(1))->isNullValue()) - return false; - - gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); - ++GEPI; // Skip over the pointer index. - - // For all other level we require that the indices are constant and inrange. - // In particular, consider: A[0][i]. We cannot know that the user isn't doing - // invalid things like allowing i to index an out-of-range subscript that - // accesses A[1]. This can also happen between different members of a struct - // in llvm IR. - for (; GEPI != E; ++GEPI) { - if (GEPI.isStruct()) +/// Look at all uses of the global and determine which (offset, type) pairs it +/// can be split into. +static bool collectSRATypes(DenseMap &Types, GlobalValue *GV, + const DataLayout &DL) { + SmallVector Worklist; + SmallPtrSet Visited; + auto AppendUses = [&](Value *V) { + for (Use &U : V->uses()) + if (Visited.insert(&U).second) + Worklist.push_back(&U); + }; + AppendUses(GV); + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + User *V = U->getUser(); + if (isa(V) || isa(V)) { + AppendUses(V); continue; + } - ConstantInt *IdxVal = dyn_cast(GEPI.getOperand()); - if (!IdxVal || (GEPI.isBoundedSequential() && - IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) - return false; - } - - return llvm::all_of(U->users(), isSafeSROAElementUse); -} - -/// Return true if the specified instruction is a safe user of a derived -/// expression from a global that we want to SROA. -static bool isSafeSROAElementUse(Value *V) { - // We might have a dead and dangling constant hanging off of here. - if (Constant *C = dyn_cast(V)) - return isSafeToDestroyConstant(C); - - Instruction *I = dyn_cast(V); - if (!I) return false; + if (auto *GEP = dyn_cast(V)) { + if (!GEP->hasAllConstantIndices()) + return false; + AppendUses(V); + continue; + } - // Loads are ok. - if (isa(I)) return true; + if (Value *Ptr = getLoadStorePointerOperand(V)) { + // This is storing the global address into somewhere, not storing into + // the global. + if (isa(V) && U->getOperandNo() == 0) + return false; - // Stores *to* the pointer are ok. - if (StoreInst *SI = dyn_cast(I)) - return SI->getOperand(0) != V; + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + if (Ptr != GV || Offset.getActiveBits() >= 64) + return false; - // Otherwise, it must be a GEP. Check it and its users are safe to SRA. - return isa(I) && isSafeSROAGEP(I); -} + // TODO: We currently require that all accesses at a given offset must + // use the same type. This could be relaxed. + Type *Ty = getLoadStoreType(V); + auto It = Types.try_emplace(Offset.getZExtValue(), Ty).first; + if (Ty != It->second) + return false; + continue; + } -/// Look at all uses of the global and decide whether it is safe for us to -/// perform this transformation. -static bool GlobalUsersSafeToSRA(GlobalValue *GV) { - for (User *U : GV->users()) { - // The user of the global must be a GEP Inst or a ConstantExpr GEP. - if (!isa(U) && - (!isa(U) || - cast(U)->getOpcode() != Instruction::GetElementPtr)) - return false; + // Ignore dead constant users. + if (auto *C = dyn_cast(V)) { + if (!isSafeToDestroyConstant(C)) + return false; + continue; + } - // Check the gep and it's users are safe to SRA - if (!isSafeSROAGEP(U)) - return false; + // Unknown user. + return false; } return true; } -static bool IsSRASequential(Type *T) { - return isa(T) || isa(T); -} -static uint64_t GetSRASequentialNumElements(Type *T) { - if (ArrayType *AT = dyn_cast(T)) - return AT->getNumElements(); - return cast(T)->getNumElements(); -} -static Type *GetSRASequentialElementType(Type *T) { - if (ArrayType *AT = dyn_cast(T)) - return AT->getElementType(); - return cast(T)->getElementType(); -} -static bool CanDoGlobalSRA(GlobalVariable *GV) { - Constant *Init = GV->getInitializer(); - - if (isa(Init->getType())) { - // nothing to check - } else if (IsSRASequential(Init->getType())) { - if (GetSRASequentialNumElements(Init->getType()) > 16 && - GV->hasNUsesOrMore(16)) - return false; // It's not worth it. - } else - return false; - - return GlobalUsersSafeToSRA(GV); -} - /// Copy over the debug info for a variable to its SRA replacements. static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, @@ -468,160 +429,140 @@ /// transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { - // Make sure this global only has simple uses that we can SRA. - if (!CanDoGlobalSRA(GV)) + assert(GV->hasLocalLinkage()); + + // Collect types to split into. + DenseMap Types; + if (!collectSRATypes(Types, GV, DL) || Types.empty()) return nullptr; - assert(GV->hasLocalLinkage()); - Constant *Init = GV->getInitializer(); - Type *Ty = Init->getType(); - uint64_t VarSize = DL.getTypeSizeInBits(Ty); + // Make sure we don't SRA back to the same type. + if (Types.size() == 1 && Types.begin()->second == GV->getValueType()) + return nullptr; - std::map NewGlobals; + // Don't perform SRA if we would have to split into many globals. + if (Types.size() > 16) + return nullptr; - // Get the alignment of the global, either explicit or target-specific. - Align StartAlignment = - DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType()); - - // Loop over all users and create replacement variables for used aggregate - // elements. - for (User *GEP : GV->users()) { - assert(((isa(GEP) && cast(GEP)->getOpcode() == - Instruction::GetElementPtr) || - isa(GEP)) && - "NonGEP CE's are not SRAable!"); - - // Ignore the 1th operand, which has to be zero or else the program is quite - // broken (undefined). Get the 2nd operand, which is the structure or array - // index. - unsigned ElementIdx = cast(GEP->getOperand(2))->getZExtValue(); - if (NewGlobals.count(ElementIdx) == 1) - continue; // we`ve already created replacement variable - assert(NewGlobals.count(ElementIdx) == 0); - - Type *ElTy = nullptr; - if (StructType *STy = dyn_cast(Ty)) - ElTy = STy->getElementType(ElementIdx); - else - ElTy = GetSRASequentialElementType(Ty); - assert(ElTy); + // Sort by offset. + SmallVector, 16> TypesVector; + append_range(TypesVector, Types); + sort(TypesVector, + [](const auto &A, const auto &B) { return A.first < B.first; }); - Constant *In = Init->getAggregateElement(ElementIdx); - assert(In && "Couldn't get element of initializer?"); + // Check that the types are non-overlapping. + uint64_t Offset = 0; + for (const auto &Pair : TypesVector) { + // Overlaps with previous type. + if (Pair.first < Offset) + return nullptr; - GlobalVariable *NGV = new GlobalVariable( - ElTy, false, GlobalVariable::InternalLinkage, In, - GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(), - GV->getType()->getAddressSpace()); - NGV->copyAttributesFrom(GV); - NewGlobals.insert(std::make_pair(ElementIdx, NGV)); - - if (StructType *STy = dyn_cast(Ty)) { - const StructLayout &Layout = *DL.getStructLayout(STy); - - // Calculate the known alignment of the field. If the original aggregate - // had 256 byte alignment for example, something might depend on that: - // propagate info to each field. - uint64_t FieldOffset = Layout.getElementOffset(ElementIdx); - Align NewAlign = commonAlignment(StartAlignment, FieldOffset); - if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx))) - NGV->setAlignment(NewAlign); - - // Copy over the debug info for the variable. - uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); - uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); - transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize); - } else { - uint64_t EltSize = DL.getTypeAllocSize(ElTy); - Align EltAlign = DL.getABITypeAlign(ElTy); - uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy); - - // Calculate the known alignment of the field. If the original aggregate - // had 256 byte alignment for example, something might depend on that: - // propagate info to each field. - Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx); - if (NewAlign > EltAlign) - NGV->setAlignment(NewAlign); - transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, - FragmentSizeInBits, VarSize); - } + Offset = Pair.first + DL.getTypeAllocSize(Pair.second); } - if (NewGlobals.empty()) + // Some accesses go beyond the end of the global, don't bother. + if (Offset > DL.getTypeAllocSize(GV->getValueType())) return nullptr; - Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); - for (auto NewGlobalVar : NewGlobals) - Globals.push_back(NewGlobalVar.second); + // Collect initializers for new globals. + Constant *OrigInit = GV->getInitializer(); + DenseMap Initializers; + for (const auto &Pair : Types) { + Constant *NewInit = ConstantFoldLoadFromConst(OrigInit, Pair.second, + APInt(64, Pair.first), DL); + if (!NewInit) { + LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of " + << *GV << " with type " << *Pair.second << " at offset " + << Pair.first << "\n"); + return nullptr; + } + Initializers.insert({Pair.first, NewInit}); + } LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); - Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); + // Get the alignment of the global, either explicit or target-specific. + Align StartAlignment = + DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType()); + uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType()); + + // Create replacement globals. + DenseMap NewGlobals; + unsigned NameSuffix = 0; + for (auto &Pair : TypesVector) { + uint64_t Offset = Pair.first; + Type *Ty = Pair.second; + GlobalVariable *NGV = new GlobalVariable( + *GV->getParent(), Ty, false, GlobalVariable::InternalLinkage, + Initializers[Offset], GV->getName() + "." + Twine(NameSuffix++), GV, + GV->getThreadLocalMode(), GV->getAddressSpace()); + NGV->copyAttributesFrom(GV); + NewGlobals.insert({Offset, NGV}); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + Align NewAlign = commonAlignment(StartAlignment, Offset); + if (NewAlign > DL.getABITypeAlign(Ty)) + NGV->setAlignment(NewAlign); + + // Copy over the debug info for the variable. + transferSRADebugInfo(GV, NGV, Offset * 8, DL.getTypeAllocSizeInBits(Ty), + VarSize); + } - // Loop over all of the uses of the global, replacing the constantexpr geps, - // with smaller constantexpr geps or direct references. - while (!GV->use_empty()) { - User *GEP = GV->user_back(); - assert(((isa(GEP) && - cast(GEP)->getOpcode()==Instruction::GetElementPtr)|| - isa(GEP)) && "NonGEP CE's are not SRAable!"); - - // Ignore the 1th operand, which has to be zero or else the program is quite - // broken (undefined). Get the 2nd operand, which is the structure or array - // index. - unsigned ElementIdx = cast(GEP->getOperand(2))->getZExtValue(); - assert(NewGlobals.count(ElementIdx) == 1); - - Value *NewPtr = NewGlobals[ElementIdx]; - Type *NewTy = NewGlobals[ElementIdx]->getValueType(); - - // Form a shorter GEP if needed. - if (GEP->getNumOperands() > 3) { - if (ConstantExpr *CE = dyn_cast(GEP)) { - SmallVector Idxs; - Idxs.push_back(NullInt); - for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) - Idxs.push_back(CE->getOperand(i)); - NewPtr = - ConstantExpr::getGetElementPtr(NewTy, cast(NewPtr), Idxs); - } else { - GetElementPtrInst *GEPI = cast(GEP); - SmallVector Idxs; - Idxs.push_back(NullInt); - for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) - Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create( - NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx), - GEPI); - } + // Replace uses of the original global with uses of the new global. + SmallVector Worklist; + SmallPtrSet Visited; + SmallVector DeadInsts; + auto AppendUsers = [&](Value *V) { + for (User *U : V->users()) + if (Visited.insert(U).second) + Worklist.push_back(U); + }; + AppendUsers(GV); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (isa(V) || isa(V) || + isa(V)) { + AppendUsers(V); + if (isa(V)) + DeadInsts.push_back(V); + continue; } - GEP->replaceAllUsesWith(NewPtr); - - // We changed the pointer of any memory access user. Recalculate alignments. - for (User *U : NewPtr->users()) { - if (auto *Load = dyn_cast(U)) { - Align PrefAlign = DL.getPrefTypeAlign(Load->getType()); - Align NewAlign = getOrEnforceKnownAlignment(Load->getPointerOperand(), - PrefAlign, DL, Load); - Load->setAlignment(NewAlign); - } - if (auto *Store = dyn_cast(U)) { - Align PrefAlign = - DL.getPrefTypeAlign(Store->getValueOperand()->getType()); - Align NewAlign = getOrEnforceKnownAlignment(Store->getPointerOperand(), - PrefAlign, DL, Store); - Store->setAlignment(NewAlign); + + if (Value *Ptr = getLoadStorePointerOperand(V)) { + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + assert(Ptr == GV && "Load/store must be from/to global"); + GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()]; + assert(NGV && "Must have replacement global for this offset"); + + // Update the pointer operand and recalculate alignment. + Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V)); + Align NewAlign = + getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast(V)); + + if (auto *LI = dyn_cast(V)) { + LI->setOperand(0, NGV); + LI->setAlignment(NewAlign); + } else { + auto *SI = cast(V); + SI->setOperand(1, NGV); + SI->setAlignment(NewAlign); } + continue; } - if (GetElementPtrInst *GEPI = dyn_cast(GEP)) - GEPI->eraseFromParent(); - else - cast(GEP)->destroyConstant(); + assert(isa(V) && isSafeToDestroyConstant(cast(V)) && + "Other users can only be dead constants"); } - // Delete the old global, now that it is dead. - Globals.erase(GV); + // Delete old instructions and global. + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + GV->removeDeadConstantUsers(); + GV->eraseFromParent(); ++NumSRA; assert(NewGlobals.size() > 0); diff --git a/llvm/test/DebugInfo/Generic/global-sra-array.ll b/llvm/test/DebugInfo/Generic/global-sra-array.ll --- a/llvm/test/DebugInfo/Generic/global-sra-array.ll +++ b/llvm/test/DebugInfo/Generic/global-sra-array.ll @@ -21,9 +21,9 @@ ; This array is first split into two struct, which are then split into their ; elements, of which only .a survives. @array = internal global [2 x %struct.anon] zeroinitializer, align 16, !dbg !0 -; CHECK: @array.0.0 = internal unnamed_addr global i32 0, align 16, !dbg ![[EL0:.*]] -; CHECK: @array.1.0 = internal unnamed_addr global i32 0, align 8, !dbg ![[EL1:.*]] -; +; CHECK: @array.0 = internal unnamed_addr global i32 0, align 16, !dbg ![[EL0:.*]] +; CHECK: @array.1 = internal unnamed_addr global i32 0, align 8, !dbg ![[EL1:.*]] + ; CHECK: ![[EL0]] = !DIGlobalVariableExpression(var: ![[VAR:.*]], expr: !DIExpression(DW_OP_LLVM_fragment, 0, 32)) ; CHECK: ![[VAR]] = distinct !DIGlobalVariable(name: "array" ; CHECK: ![[EL1]] = !DIGlobalVariableExpression(var: ![[VAR]], expr: !DIExpression(DW_OP_LLVM_fragment, 64, 32)) diff --git a/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll b/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll --- a/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll +++ b/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll @@ -38,7 +38,7 @@ define void @print() { ; CHECK-LABEL: @print( -; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** @_ZL14buttonInitData.0.0, align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** @_ZL14buttonInitData.0, align 16 ; CHECK-NEXT: call void @test(i8* [[TMP1]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-align.ll b/llvm/test/Transforms/GlobalOpt/globalsra-align.ll --- a/llvm/test/Transforms/GlobalOpt/globalsra-align.ll +++ b/llvm/test/Transforms/GlobalOpt/globalsra-align.ll @@ -14,16 +14,14 @@ ; preferred alignment from the datalayout. ;. -; CHECK: @[[A_1:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global [7 x i32*] zeroinitializer, align 16 -; CHECK: @[[A_2_0:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8 -; CHECK: @[[A_2_1:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8 -; CHECK: @[[A_2_2:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8 -; CHECK: @[[A_2_3:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8 +; CHECK: @[[A_4:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8 +; CHECK: @[[A_5:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 16 +; CHECK: @[[A_6:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 16 +; CHECK: @[[A_7:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 16 ;. define i32* @reduce_align_0() { ; CHECK-LABEL: @reduce_align_0( -; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.0, align 8 -; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 0), align 16 +; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.4, align 8 ; CHECK-NEXT: ret i32* [[X]] ; %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 0), align 8 @@ -33,8 +31,7 @@ define i32* @reduce_align_1() { ; CHECK-LABEL: @reduce_align_1( -; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.1, align 8 -; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 1), align 4 +; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.5, align 16 ; CHECK-NEXT: ret i32* [[X]] ; %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 1), align 4 @@ -44,8 +41,7 @@ define i32* @reduce_align_2() { ; CHECK-LABEL: @reduce_align_2( -; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.2, align 8 -; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 2), align 8 +; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.6, align 16 ; CHECK-NEXT: ret i32* [[X]] ; %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 2), align 16 @@ -55,8 +51,7 @@ define i32* @reduce_align_3() { ; CHECK-LABEL: @reduce_align_3( -; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.3, align 8 -; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 3), align 4 +; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.7, align 16 ; CHECK-NEXT: ret i32* [[X]] ; %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 3), align 4 diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-generic-type.ll b/llvm/test/Transforms/GlobalOpt/globalsra-generic-type.ll --- a/llvm/test/Transforms/GlobalOpt/globalsra-generic-type.ll +++ b/llvm/test/Transforms/GlobalOpt/globalsra-generic-type.ll @@ -5,8 +5,6 @@ define void @test() { ; CHECK-LABEL: @test( -; CHECK-NEXT: store i32 1, i32* bitcast ([8 x i8]* @g to i32*), align 4 -; CHECK-NEXT: store i32 2, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1), align 4 ; CHECK-NEXT: ret void ; store i32 1, i32* bitcast ([8 x i8]* @g to i32*) @@ -16,8 +14,7 @@ define i32 @load1() { ; CHECK-LABEL: @load1( -; CHECK-NEXT: [[V:%.*]] = load i32, i32* bitcast ([8 x i8]* @g to i32*), align 4 -; CHECK-NEXT: ret i32 [[V]] +; CHECK-NEXT: ret i32 1 ; %v = load i32, i32* bitcast ([8 x i8]* @g to i32*) ret i32 %v @@ -25,8 +22,7 @@ define i32 @load2() { ; CHECK-LABEL: @load2( -; CHECK-NEXT: [[V:%.*]] = load i32, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1), align 4 -; CHECK-NEXT: ret i32 [[V]] +; CHECK-NEXT: ret i32 2 ; %v = load i32, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1) ret i32 %v diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-opaque-ptr.ll b/llvm/test/Transforms/GlobalOpt/globalsra-opaque-ptr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/globalsra-opaque-ptr.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt -S -globalopt -opaque-pointers < %s | FileCheck %s + +; Global SRA should not be performed here (or at least not naively), as +; offset 4 is accessed as both i32 and i64. + +%T = type { i32, i32, i32, i32 } +@g = internal global %T zeroinitializer + +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global [[T:%.*]] zeroinitializer +;. +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: store i32 1, ptr getelementptr inbounds ([[T:%.*]], ptr @g, i64 0, i32 1), align 4 +; CHECK-NEXT: store i32 2, ptr getelementptr inbounds ([[T]], ptr @g, i64 0, i32 2), align 4 +; CHECK-NEXT: ret void +; + store i32 1, ptr getelementptr (%T, ptr @g, i64 0, i32 1) + store i32 2, ptr getelementptr (%T, ptr @g, i64 0, i32 2) + ret void +} + +define i32 @load1() { +; CHECK-LABEL: @load1( +; CHECK-NEXT: [[V:%.*]] = load i32, ptr getelementptr inbounds ([[T:%.*]], ptr @g, i64 0, i32 1), align 4 +; CHECK-NEXT: ret i32 [[V]] +; + %v = load i32, ptr getelementptr (%T, ptr @g, i64 0, i32 1) + ret i32 %v +} + +define i64 @load2() { +; CHECK-LABEL: @load2( +; CHECK-NEXT: [[V:%.*]] = load i64, ptr getelementptr inbounds ([[T:%.*]], ptr @g, i64 0, i32 2), align 4 +; CHECK-NEXT: ret i64 [[V]] +; + %v = load i64, ptr getelementptr (%T, ptr @g, i64 0, i32 2) + ret i64 %v +} diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-recursive.ll b/llvm/test/Transforms/GlobalOpt/globalsra-recursive.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/globalsra-recursive.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -globalopt < %s | FileCheck %s + +; Make sure we don't recursively SRA if there are aggregate load/stores with +; the same type as the global. + +@g = internal global { i32, i32 } undef + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: store { i32, i32 } zeroinitializer, { i32, i32 }* @g, align 4 +; CHECK-NEXT: store { i32, i32 } { i32 0, i32 1 }, { i32, i32 }* @g, align 4 +; CHECK-NEXT: ret void +; + store { i32, i32 } zeroinitializer, { i32, i32 }* @g + store { i32, i32 } { i32 0, i32 1 }, { i32, i32 }* @g + ret void +} + +define { i32, i32 } @load() { +; CHECK-LABEL: @load( +; CHECK-NEXT: [[V:%.*]] = load { i32, i32 }, { i32, i32 }* @g, align 4 +; CHECK-NEXT: ret { i32, i32 } [[V]] +; + %v = load { i32, i32 }, { i32, i32 }* @g + ret { i32, i32 } %v +}