Diff 400458

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Show First 20 Lines • Show All 331 Lines • ▼ Show 20 Lines	static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
}		}

Changed \|=		Changed \|=
RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);		RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
GV->removeDeadConstantUsers();		GV->removeDeadConstantUsers();
return Changed;		return Changed;
}		}

static bool isSafeSROAElementUse(Value *V);		/// Look at all uses of the global and determine which (offset, type) pairs it
		/// can be split into.
/// Return true if the specified GEP is a safe user of a derived		static bool collectSRATypes(DenseMap<uint64_t, Type > &Types, GlobalValue GV,
/// expression from a global that we want to SROA.		const DataLayout &DL) {
static bool isSafeSROAGEP(User *U) {		SmallVector<Use *, 16> Worklist;
// Check to see if this ConstantExpr GEP is SRA'able. In particular, we		SmallPtrSet<Use *, 16> Visited;
// don't like < 3 operand CE's, and we don't like non-constant integer		auto AppendUses = [&](Value *V) {
// indices. This enforces that all uses are 'gep GV, 0, C, ...' for some		for (Use &U : V->uses())
// value of C.		if (Visited.insert(&U).second)
if (U->getNumOperands() < 3 \|\| !isa<Constant>(U->getOperand(1)) \|\|		Worklist.push_back(&U);
!cast<Constant>(U->getOperand(1))->isNullValue())		};
return false;		AppendUses(GV);
		while (!Worklist.empty()) {
gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);		Use *U = Worklist.pop_back_val();
++GEPI; // Skip over the pointer index.		User *V = U->getUser();
		if (isa<BitCastOperator>(V) \|\| isa<AddrSpaceCastOperator>(V)) {
// For all other level we require that the indices are constant and inrange.		AppendUses(V);
// In particular, consider: A[0][i]. We cannot know that the user isn't doing
// invalid things like allowing i to index an out-of-range subscript that
// accesses A[1]. This can also happen between different members of a struct
// in llvm IR.
for (; GEPI != E; ++GEPI) {
if (GEPI.isStruct())
continue;		continue;

ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
if (!IdxVal \|\| (GEPI.isBoundedSequential() &&
IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
return false;
}

return llvm::all_of(U->users(), isSafeSROAElementUse);
}		}

/// Return true if the specified instruction is a safe user of a derived		if (auto *GEP = dyn_cast<GEPOperator>(V)) {
/// expression from a global that we want to SROA.		if (!GEP->hasAllConstantIndices())
static bool isSafeSROAElementUse(Value *V) {		return false;
// We might have a dead and dangling constant hanging off of here.		AppendUses(V);
if (Constant *C = dyn_cast<Constant>(V))		continue;
return isSafeToDestroyConstant(C);

Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;

// Loads are ok.
if (isa<LoadInst>(I)) return true;

// Stores to the pointer are ok.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getOperand(0) != V;

// Otherwise, it must be a GEP. Check it and its users are safe to SRA.
return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
}		}

/// Look at all uses of the global and decide whether it is safe for us to		if (Value *Ptr = getLoadStorePointerOperand(V)) {
/// perform this transformation.		// This is storing the global address into somewhere, not storing into
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {		// the global.
for (User *U : GV->users()) {		if (isa<StoreInst>(V) && U->getOperandNo() == 0)
// The user of the global must be a GEP Inst or a ConstantExpr GEP.
if (!isa<GetElementPtrInst>(U) &&
(!isa<ConstantExpr>(U) \|\|
cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
return false;		return false;

// Check the gep and it's users are safe to SRA		APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
if (!isSafeSROAGEP(U))		Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
		/* AllowNonInbounds */ true);
		if (Ptr != GV \|\| Offset.getActiveBits() >= 64)
return false;		return false;
}

return true;		// TODO: We currently require that all accesses at a given offset must
		// use the same type. This could be relaxed.
		Type *Ty = getLoadStoreType(V);
		auto It = Types.try_emplace(Offset.getZExtValue(), Ty).first;
		if (Ty != It->second)
		return false;
		continue;
}		}

static bool IsSRASequential(Type *T) {		// Ignore dead constant users.
return isa<ArrayType>(T) \|\| isa<VectorType>(T);		if (auto *C = dyn_cast<Constant>(V)) {
}		if (!isSafeToDestroyConstant(C))
static uint64_t GetSRASequentialNumElements(Type *T) {		return false;
if (ArrayType *AT = dyn_cast<ArrayType>(T))		continue;
return AT->getNumElements();
return cast<FixedVectorType>(T)->getNumElements();
}
static Type GetSRASequentialElementType(Type T) {
if (ArrayType *AT = dyn_cast<ArrayType>(T))
return AT->getElementType();
return cast<VectorType>(T)->getElementType();
}		}
static bool CanDoGlobalSRA(GlobalVariable *GV) {
Constant *Init = GV->getInitializer();

if (isa<StructType>(Init->getType())) {		// Unknown user.
// nothing to check
} else if (IsSRASequential(Init->getType())) {
if (GetSRASequentialNumElements(Init->getType()) > 16 &&
GV->hasNUsesOrMore(16))
return false; // It's not worth it.
} else
return false;		return false;
		}

return GlobalUsersSafeToSRA(GV);		return true;
}		}

/// Copy over the debug info for a variable to its SRA replacements.		/// Copy over the debug info for a variable to its SRA replacements.
static void transferSRADebugInfo(GlobalVariable GV, GlobalVariable NGV,		static void transferSRADebugInfo(GlobalVariable GV, GlobalVariable NGV,
uint64_t FragmentOffsetInBits,		uint64_t FragmentOffsetInBits,
uint64_t FragmentSizeInBits,		uint64_t FragmentSizeInBits,
uint64_t VarSize) {		uint64_t VarSize) {
SmallVector<DIGlobalVariableExpression *, 1> GVs;		SmallVector<DIGlobalVariableExpression *, 1> GVs;
Show All 16 Lines
}		}

/// Perform scalar replacement of aggregates on the specified global variable.		/// Perform scalar replacement of aggregates on the specified global variable.
/// This opens the door for other optimizations by exposing the behavior of the		/// This opens the door for other optimizations by exposing the behavior of the
/// program in a more fine-grained way. We have determined that this		/// program in a more fine-grained way. We have determined that this
/// transformation is safe already. We return the first global variable we		/// transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.		/// insert so that the caller can reprocess it.
static GlobalVariable SRAGlobal(GlobalVariable GV, const DataLayout &DL) {		static GlobalVariable SRAGlobal(GlobalVariable GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.		assert(GV->hasLocalLinkage());
if (!CanDoGlobalSRA(GV))
		// Collect types to split into.
		DenseMap<uint64_t, Type *> Types;
		if (!collectSRATypes(Types, GV, DL) \|\| Types.empty())
return nullptr;		return nullptr;

assert(GV->hasLocalLinkage());		// Make sure we don't SRA back to the same type.
Constant *Init = GV->getInitializer();		if (Types.size() == 1 && Types.begin()->second == GV->getValueType())
Type *Ty = Init->getType();		return nullptr;
uint64_t VarSize = DL.getTypeSizeInBits(Ty);

std::map<unsigned, GlobalVariable *> NewGlobals;		// Don't perform SRA if we would have to split into many globals.
		if (Types.size() > 16)
		fhahnUnsubmitted Not Done Reply Inline Actions I think this limit is more aggressive for struct types than the original code AFAICT, which seems to be causing some code-size regressions. I put up D129525 to make the limit behave more like the original code. fhahn: I think this limit is more aggressive for struct types than the original code AFAICT, which…
		return nullptr;

// Get the alignment of the global, either explicit or target-specific.		// Sort by offset.
Align StartAlignment =		SmallVector<std::pair<uint64_t, Type *>, 16> TypesVector;
DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getType());		append_range(TypesVector, Types);
		sort(TypesVector,
		[](const auto &A, const auto &B) { return A.first < B.first; });

		// Check that the types are non-overlapping.
		uint64_t Offset = 0;
		for (const auto &Pair : TypesVector) {
		// Overlaps with previous type.
		if (Pair.first < Offset)
		return nullptr;

// Loop over all users and create replacement variables for used aggregate		Offset = Pair.first + DL.getTypeAllocSize(Pair.second);
// elements.		}
for (User *GEP : GV->users()) {
assert(((isa<ConstantExpr>(GEP) && cast<ConstantExpr>(GEP)->getOpcode() ==		// Some accesses go beyond the end of the global, don't bother.
Instruction::GetElementPtr) \|\|		if (Offset > DL.getTypeAllocSize(GV->getValueType()))
isa<GetElementPtrInst>(GEP)) &&		return nullptr;
"NonGEP CE's are not SRAable!");

// Ignore the 1th operand, which has to be zero or else the program is quite
// broken (undefined). Get the 2nd operand, which is the structure or array
// index.
unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
if (NewGlobals.count(ElementIdx) == 1)
continue; // we`ve already created replacement variable
assert(NewGlobals.count(ElementIdx) == 0);

Type *ElTy = nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty))
ElTy = STy->getElementType(ElementIdx);
else
ElTy = GetSRASequentialElementType(Ty);
assert(ElTy);

Constant *In = Init->getAggregateElement(ElementIdx);		// Collect initializers for new globals.
assert(In && "Couldn't get element of initializer?");		Constant *OrigInit = GV->getInitializer();
		DenseMap<uint64_t, Constant *> Initializers;
		for (const auto &Pair : Types) {
		Constant *NewInit = ConstantFoldLoadFromConst(OrigInit, Pair.second,
		APInt(64, Pair.first), DL);
		if (!NewInit) {
		LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
		<< GV << " with type " << Pair.second << " at offset "
		<< Pair.first << "\n");
		return nullptr;
		}
		Initializers.insert({Pair.first, NewInit});
		}

		LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");

		// Get the alignment of the global, either explicit or target-specific.
		Align StartAlignment =
		DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
		uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType());

		// Create replacement globals.
		DenseMap<uint64_t, GlobalVariable *> NewGlobals;
		unsigned NameSuffix = 0;
		for (auto &Pair : TypesVector) {
		uint64_t Offset = Pair.first;
		Type *Ty = Pair.second;
GlobalVariable *NGV = new GlobalVariable(		GlobalVariable *NGV = new GlobalVariable(
ElTy, false, GlobalVariable::InternalLinkage, In,		*GV->getParent(), Ty, false, GlobalVariable::InternalLinkage,
GV->getName() + "." + Twine(ElementIdx), GV->getThreadLocalMode(),		Initializers[Offset], GV->getName() + "." + Twine(NameSuffix++), GV,
GV->getType()->getAddressSpace());		GV->getThreadLocalMode(), GV->getAddressSpace());
NGV->copyAttributesFrom(GV);		NGV->copyAttributesFrom(GV);
NewGlobals.insert(std::make_pair(ElementIdx, NGV));		NewGlobals.insert({Offset, NGV});

if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout &Layout = *DL.getStructLayout(STy);

// Calculate the known alignment of the field. If the original aggregate		// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:		// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.		// propagate info to each field.
uint64_t FieldOffset = Layout.getElementOffset(ElementIdx);		Align NewAlign = commonAlignment(StartAlignment, Offset);
Align NewAlign = commonAlignment(StartAlignment, FieldOffset);		if (NewAlign > DL.getABITypeAlign(Ty))
if (NewAlign > DL.getABITypeAlign(STy->getElementType(ElementIdx)))
NGV->setAlignment(NewAlign);		NGV->setAlignment(NewAlign);

// Copy over the debug info for the variable.		// Copy over the debug info for the variable.
uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());		transferSRADebugInfo(GV, NGV, Offset * 8, DL.getTypeAllocSizeInBits(Ty),
uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);		VarSize);
transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize);
} else {
uint64_t EltSize = DL.getTypeAllocSize(ElTy);
Align EltAlign = DL.getABITypeAlign(ElTy);
uint64_t FragmentSizeInBits = DL.getTypeAllocSizeInBits(ElTy);

// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
Align NewAlign = commonAlignment(StartAlignment, EltSize * ElementIdx);
if (NewAlign > EltAlign)
NGV->setAlignment(NewAlign);
transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
FragmentSizeInBits, VarSize);
}
}		}

if (NewGlobals.empty())		// Replace uses of the original global with uses of the new global.
return nullptr;		SmallVector<Value *, 16> Worklist;
		SmallPtrSet<Value *, 16> Visited;
Module::GlobalListType &Globals = GV->getParent()->getGlobalList();		SmallVector<WeakTrackingVH, 16> DeadInsts;
for (auto NewGlobalVar : NewGlobals)		auto AppendUsers = [&](Value *V) {
Globals.push_back(NewGlobalVar.second);		for (User *U : V->users())
		if (Visited.insert(U).second)
LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");		Worklist.push_back(U);
		};
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));		AppendUsers(GV);
		while (!Worklist.empty()) {
		Value *V = Worklist.pop_back_val();
		if (isa<BitCastOperator>(V) \|\| isa<AddrSpaceCastOperator>(V) \|\|
		isa<GEPOperator>(V)) {
		AppendUsers(V);
		if (isa<Instruction>(V))
		DeadInsts.push_back(V);
		continue;
		}

// Loop over all of the uses of the global, replacing the constantexpr geps,		if (Value *Ptr = getLoadStorePointerOperand(V)) {
// with smaller constantexpr geps or direct references.		APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
while (!GV->use_empty()) {		Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
User *GEP = GV->user_back();		/* AllowNonInbounds */ true);
assert(((isa<ConstantExpr>(GEP) &&		assert(Ptr == GV && "Load/store must be from/to global");
cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)\|\|		GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()];
isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");		assert(NGV && "Must have replacement global for this offset");

// Ignore the 1th operand, which has to be zero or else the program is quite		// Update the pointer operand and recalculate alignment.
// broken (undefined). Get the 2nd operand, which is the structure or array		Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V));
// index.		Align NewAlign =
unsigned ElementIdx = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();		getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V));
assert(NewGlobals.count(ElementIdx) == 1);
		if (auto *LI = dyn_cast<LoadInst>(V)) {
Value *NewPtr = NewGlobals[ElementIdx];		LI->setOperand(0, NGV);
Type *NewTy = NewGlobals[ElementIdx]->getValueType();		LI->setAlignment(NewAlign);

// Form a shorter GEP if needed.
if (GEP->getNumOperands() > 3) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
SmallVector<Constant*, 8> Idxs;
Idxs.push_back(NullInt);
for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
Idxs.push_back(CE->getOperand(i));
NewPtr =
ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
} else {		} else {
GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);		auto *SI = cast<StoreInst>(V);
SmallVector<Value*, 8> Idxs;		SI->setOperand(1, NGV);
Idxs.push_back(NullInt);		SI->setAlignment(NewAlign);
for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
Idxs.push_back(GEPI->getOperand(i));
NewPtr = GetElementPtrInst::Create(
NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(ElementIdx),
GEPI);
}
}
GEP->replaceAllUsesWith(NewPtr);

// We changed the pointer of any memory access user. Recalculate alignments.
for (User *U : NewPtr->users()) {
if (auto *Load = dyn_cast<LoadInst>(U)) {
Align PrefAlign = DL.getPrefTypeAlign(Load->getType());
Align NewAlign = getOrEnforceKnownAlignment(Load->getPointerOperand(),
PrefAlign, DL, Load);
Load->setAlignment(NewAlign);
}
if (auto *Store = dyn_cast<StoreInst>(U)) {
Align PrefAlign =
DL.getPrefTypeAlign(Store->getValueOperand()->getType());
Align NewAlign = getOrEnforceKnownAlignment(Store->getPointerOperand(),
PrefAlign, DL, Store);
Store->setAlignment(NewAlign);
}		}
		continue;
}		}

if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))		assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) &&
GEPI->eraseFromParent();		"Other users can only be dead constants");
else
cast<ConstantExpr>(GEP)->destroyConstant();
}		}

// Delete the old global, now that it is dead.		// Delete old instructions and global.
Globals.erase(GV);		RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
		GV->removeDeadConstantUsers();
		GV->eraseFromParent();
++NumSRA;		++NumSRA;

assert(NewGlobals.size() > 0);		assert(NewGlobals.size() > 0);
return NewGlobals.begin()->second;		return NewGlobals.begin()->second;
}		}

/// Return true if all users of the specified value will trap if the value is		/// Return true if all users of the specified value will trap if the value is
/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid		/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
▲ Show 20 Lines • Show All 2,017 Lines • Show Last 20 Lines

llvm/test/DebugInfo/Generic/global-sra-array.ll

	Show All 15 Lines
	; return array[0].a + array[1].a;			; return array[0].a + array[1].a;
	; }			; }

	%struct.anon = type { i32, i8 }			%struct.anon = type { i32, i8 }

	; This array is first split into two struct, which are then split into their			; This array is first split into two struct, which are then split into their
	; elements, of which only .a survives.			; elements, of which only .a survives.
	@array = internal global [2 x %struct.anon] zeroinitializer, align 16, !dbg !0			@array = internal global [2 x %struct.anon] zeroinitializer, align 16, !dbg !0
	; CHECK: @array.0.0 = internal unnamed_addr global i32 0, align 16, !dbg ![[EL0:.*]]			; CHECK: @array.0 = internal unnamed_addr global i32 0, align 16, !dbg ![[EL0:.*]]
	; CHECK: @array.1.0 = internal unnamed_addr global i32 0, align 8, !dbg ![[EL1:.*]]			; CHECK: @array.1 = internal unnamed_addr global i32 0, align 8, !dbg ![[EL1:.*]]
	;
	; CHECK: ![[EL0]] = !DIGlobalVariableExpression(var: ![[VAR:.*]], expr: !DIExpression(DW_OP_LLVM_fragment, 0, 32))			; CHECK: ![[EL0]] = !DIGlobalVariableExpression(var: ![[VAR:.*]], expr: !DIExpression(DW_OP_LLVM_fragment, 0, 32))
	; CHECK: ![[VAR]] = distinct !DIGlobalVariable(name: "array"			; CHECK: ![[VAR]] = distinct !DIGlobalVariable(name: "array"
	; CHECK: ![[EL1]] = !DIGlobalVariableExpression(var: ![[VAR]], expr: !DIExpression(DW_OP_LLVM_fragment, 64, 32))			; CHECK: ![[EL1]] = !DIGlobalVariableExpression(var: ![[VAR]], expr: !DIExpression(DW_OP_LLVM_fragment, 64, 32))


	; Function Attrs: nounwind optsize ssp uwtable			; Function Attrs: nounwind optsize ssp uwtable
	define void @foo(i32 %in) #0 {			define void @foo(i32 %in) #0 {
	entry:			entry:
	▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines

llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll

	Show All 32 Lines
	}			}

	declare void @test(i8*)			declare void @test(i8*)

	; The preferred alignment is available.			; The preferred alignment is available.

	define void @print() {			define void @print() {
	; CHECK-LABEL: @print(			; CHECK-LABEL: @print(
	; CHECK-NEXT: [[TMP1:%.]] = load i8, i8** @_ZL14buttonInitData.0.0, align 16			; CHECK-NEXT: [[TMP1:%.]] = load i8, i8** @_ZL14buttonInitData.0, align 16
	; CHECK-NEXT: call void @test(i8* [[TMP1]])			; CHECK-NEXT: call void @test(i8* [[TMP1]])
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	%1 = load i8, i8* getelementptr inbounds ([1 x %struct.ButtonInitData], [1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4			%1 = load i8, i8* getelementptr inbounds ([1 x %struct.ButtonInitData], [1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
	call void @test(i8* %1)			call void @test(i8* %1)
	ret void			ret void
	}			}

	!2009 = !{}			!2009 = !{}

llvm/test/Transforms/GlobalOpt/globalsra-align.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
	; RUN: opt < %s -passes=globalopt -S \| FileCheck %s			; RUN: opt < %s -passes=globalopt -S \| FileCheck %s

	target datalayout = "p:16:32:64" ; 16-bit pointers with 32-bit ABI alignment and 64-bit preferred alignmentt			target datalayout = "p:16:32:64" ; 16-bit pointers with 32-bit ABI alignment and 64-bit preferred alignmentt

	@a = internal externally_initialized global [3 x [7 x i32*]] zeroinitializer, align 16			@a = internal externally_initialized global [3 x [7 x i32*]] zeroinitializer, align 16

	; PR50253			; PR50253
	; The alignments are correct initially, but they should be updated			; The alignments are correct initially, but they should be updated
	; after transforming the global. The stored global pointer array retains			; after transforming the global. The stored global pointer array retains
	; its original "align 16", so access to element N into the new array			; its original "align 16", so access to element N into the new array
	; should be offset by the ABI alignment of N pointers.			; should be offset by the ABI alignment of N pointers.
	; Loaded globals are split into individual pointers and use the			; Loaded globals are split into individual pointers and use the
	; preferred alignment from the datalayout.			; preferred alignment from the datalayout.

	;.			;.
	; CHECK: @[[A_1:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global [7 x i32*] zeroinitializer, align 16			; CHECK: @[[A_4:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8
	; CHECK: @[[A_2_0:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8			; CHECK: @[[A_5:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 16
	; CHECK: @[[A_2_1:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8			; CHECK: @[[A_6:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 16
	; CHECK: @[[A_2_2:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8			; CHECK: @[[A_7:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 16
	; CHECK: @[[A_2_3:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global i32* null, align 8
	;.			;.
	define i32* @reduce_align_0() {			define i32* @reduce_align_0() {
	; CHECK-LABEL: @reduce_align_0(			; CHECK-LABEL: @reduce_align_0(
	; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.2.0, align 8			; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.4, align 8
	; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32], [7 x i32]* @a.1, i32 0, i64 0), align 16
	; CHECK-NEXT: ret i32* [[X]]			; CHECK-NEXT: ret i32* [[X]]
	;			;
	%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 0), align 8			%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 0), align 8
	store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 0), align 4			store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 0), align 4
	ret i32* %x			ret i32* %x
	}			}

	define i32* @reduce_align_1() {			define i32* @reduce_align_1() {
	; CHECK-LABEL: @reduce_align_1(			; CHECK-LABEL: @reduce_align_1(
	; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.2.1, align 8			; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.5, align 16
	; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32], [7 x i32]* @a.1, i32 0, i64 1), align 4
	; CHECK-NEXT: ret i32* [[X]]			; CHECK-NEXT: ret i32* [[X]]
	;			;
	%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 1), align 4			%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 1), align 4
	store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 1), align 16			store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 1), align 16
	ret i32* %x			ret i32* %x
	}			}

	define i32* @reduce_align_2() {			define i32* @reduce_align_2() {
	; CHECK-LABEL: @reduce_align_2(			; CHECK-LABEL: @reduce_align_2(
	; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.2.2, align 8			; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.6, align 16
	; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32], [7 x i32]* @a.1, i32 0, i64 2), align 8
	; CHECK-NEXT: ret i32* [[X]]			; CHECK-NEXT: ret i32* [[X]]
	;			;
	%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 2), align 16			%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 2), align 16
	store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 2), align 4			store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 2), align 4
	ret i32* %x			ret i32* %x
	}			}

	define i32* @reduce_align_3() {			define i32* @reduce_align_3() {
	; CHECK-LABEL: @reduce_align_3(			; CHECK-LABEL: @reduce_align_3(
	; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.2.3, align 8			; CHECK-NEXT: [[X:%.]] = load i32, i32** @a.7, align 16
	; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32], [7 x i32]* @a.1, i32 0, i64 3), align 4
	; CHECK-NEXT: ret i32* [[X]]			; CHECK-NEXT: ret i32* [[X]]
	;			;
	%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 3), align 4			%x = load i32, i32* getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 2, i64 3), align 4
	store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 3), align 8			store i32* null, i32** getelementptr inbounds ([3 x [7 x i32]], [3 x [7 x i32]]* @a, i64 0, i64 1, i64 3), align 8
	ret i32* %x			ret i32* %x
	}			}

llvm/test/Transforms/GlobalOpt/globalsra-generic-type.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -S -globalopt < %s \| FileCheck %s			; RUN: opt -S -globalopt < %s \| FileCheck %s

	@g = internal global [8 x i8] undef			@g = internal global [8 x i8] undef

	define void @test() {			define void @test() {
	; CHECK-LABEL: @test(			; CHECK-LABEL: @test(
	; CHECK-NEXT: store i32 1, i32* bitcast ([8 x i8]* @g to i32*), align 4
	; CHECK-NEXT: store i32 2, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1), align 4
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	store i32 1, i32* bitcast ([8 x i8]* @g to i32*)			store i32 1, i32* bitcast ([8 x i8]* @g to i32*)
	store i32 2, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1)			store i32 2, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1)
	ret void			ret void
	}			}

	define i32 @load1() {			define i32 @load1() {
	; CHECK-LABEL: @load1(			; CHECK-LABEL: @load1(
	; CHECK-NEXT: [[V:%.]] = load i32, i32 bitcast ([8 x i8]* @g to i32*), align 4			; CHECK-NEXT: ret i32 1
	; CHECK-NEXT: ret i32 [[V]]
	;			;
	%v = load i32, i32* bitcast ([8 x i8]* @g to i32*)			%v = load i32, i32* bitcast ([8 x i8]* @g to i32*)
	ret i32 %v			ret i32 %v
	}			}

	define i32 @load2() {			define i32 @load2() {
	; CHECK-LABEL: @load2(			; CHECK-LABEL: @load2(
	; CHECK-NEXT: [[V:%.]] = load i32, i32 getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1), align 4			; CHECK-NEXT: ret i32 2
	; CHECK-NEXT: ret i32 [[V]]
	;			;
	%v = load i32, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1)			%v = load i32, i32* getelementptr (i32, i32* bitcast ([8 x i8]* @g to i32*), i64 1)
	ret i32 %v			ret i32 %v
	}			}

llvm/test/Transforms/GlobalOpt/globalsra-opaque-ptr.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
				; RUN: opt -S -globalopt -opaque-pointers < %s \| FileCheck %s

				; Global SRA should not be performed here (or at least not naively), as
				; offset 4 is accessed as both i32 and i64.

				%T = type { i32, i32, i32, i32 }
				@g = internal global %T zeroinitializer

				;.
				; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global [[T:%.*]] zeroinitializer
				;.
				define void @test1() {
				; CHECK-LABEL: @test1(
				; CHECK-NEXT: store i32 1, ptr getelementptr inbounds ([[T:%.*]], ptr @g, i64 0, i32 1), align 4
				; CHECK-NEXT: store i32 2, ptr getelementptr inbounds ([[T]], ptr @g, i64 0, i32 2), align 4
				; CHECK-NEXT: ret void
				;
				store i32 1, ptr getelementptr (%T, ptr @g, i64 0, i32 1)
				store i32 2, ptr getelementptr (%T, ptr @g, i64 0, i32 2)
				ret void
				}

				define i32 @load1() {
				; CHECK-LABEL: @load1(
				; CHECK-NEXT: [[V:%.]] = load i32, ptr getelementptr inbounds ([[T:%.]], ptr @g, i64 0, i32 1), align 4
				; CHECK-NEXT: ret i32 [[V]]
				;
				%v = load i32, ptr getelementptr (%T, ptr @g, i64 0, i32 1)
				ret i32 %v
				}

				define i64 @load2() {
				; CHECK-LABEL: @load2(
				; CHECK-NEXT: [[V:%.]] = load i64, ptr getelementptr inbounds ([[T:%.]], ptr @g, i64 0, i32 2), align 4
				; CHECK-NEXT: ret i64 [[V]]
				;
				%v = load i64, ptr getelementptr (%T, ptr @g, i64 0, i32 2)
				ret i64 %v
				}

llvm/test/Transforms/GlobalOpt/globalsra-recursive.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt -S -globalopt < %s \| FileCheck %s

				; Make sure we don't recursively SRA if there are aggregate load/stores with
				; the same type as the global.

				@g = internal global { i32, i32 } undef

				define void @test() {
				; CHECK-LABEL: @test(
				; CHECK-NEXT: store { i32, i32 } zeroinitializer, { i32, i32 }* @g, align 4
				; CHECK-NEXT: store { i32, i32 } { i32 0, i32 1 }, { i32, i32 }* @g, align 4
				; CHECK-NEXT: ret void
				;
				store { i32, i32 } zeroinitializer, { i32, i32 }* @g
				store { i32, i32 } { i32 0, i32 1 }, { i32, i32 }* @g
				ret void
				}

				define { i32, i32 } @load() {
				; CHECK-LABEL: @load(
				; CHECK-NEXT: [[V:%.]] = load { i32, i32 }, { i32, i32 } @g, align 4
				; CHECK-NEXT: ret { i32, i32 } [[V]]
				;
				%v = load { i32, i32 }, { i32, i32 }* @g
				ret { i32, i32 } %v
				}

This is an archive of the discontinued LLVM Phabricator instance.

[GlobalOpt] Make global SRA offset based
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 400458

llvm/lib/Transforms/IPO/GlobalOpt.cpp

llvm/test/DebugInfo/Generic/global-sra-array.ll

llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll

llvm/test/Transforms/GlobalOpt/globalsra-align.ll

llvm/test/Transforms/GlobalOpt/globalsra-generic-type.ll

llvm/test/Transforms/GlobalOpt/globalsra-opaque-ptr.ll

llvm/test/Transforms/GlobalOpt/globalsra-recursive.ll

This is an archive of the discontinued LLVM Phabricator instance.

[GlobalOpt] Make global SRA offset basedClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 400458

llvm/lib/Transforms/IPO/GlobalOpt.cpp

llvm/test/DebugInfo/Generic/global-sra-array.ll

llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll

llvm/test/Transforms/GlobalOpt/globalsra-align.ll

llvm/test/Transforms/GlobalOpt/globalsra-generic-type.ll

llvm/test/Transforms/GlobalOpt/globalsra-opaque-ptr.ll

llvm/test/Transforms/GlobalOpt/globalsra-recursive.ll

[GlobalOpt] Make global SRA offset based
ClosedPublic