Diff 485420

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Show First 20 Lines • Show All 100 Lines • ▼ Show 20 Lines	struct MemsetRange {
SmallVector<Instruction*, 16> TheStores;		SmallVector<Instruction*, 16> TheStores;

bool isProfitableToUseMemset(const DataLayout &DL) const;		bool isProfitableToUseMemset(const DataLayout &DL) const;
};		};

} // end anonymous namespace		} // end anonymous namespace

bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {		bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.		// Count the number of non-undef stores
if (TheStores.size() >= 4 \|\| End-Start >= 16) return true;		size_t non_undef_stores = 0;
		bool no_undef = true;
// If there is nothing to merge, don't do anything.		for (Instruction *SI : TheStores) {
if (TheStores.size() < 2) return false;		if (auto *Store = dyn_cast<StoreInst>(SI)) {
		Value *StoredVal = Store->getValueOperand();
		if (!isa<UndefValue>(StoredVal))
		non_undef_stores++;
		else
		no_undef = false;
		} else {
// If any of the stores are a memset, then it is always good to extend the		// If any of the stores are a memset, then it is always good to extend the
// memset.		// memset.
for (Instruction *SI : TheStores)
if (!isa<StoreInst>(SI))
return true;		return true;
		}
		}
		// If we found more than 4 stores to merge or 16 bytes, use memset.
		if (non_undef_stores >= 4 \|\| (no_undef && End-Start >= 16)) return true;

		// If there is nothing to merge, don't do anything.
// Assume that the code generator is capable of merging pairs of stores		// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.		// together if it wants to.
if (TheStores.size() == 2) return false;		if (non_undef_stores <= 2) return false;

// If we have fewer than 8 stores, it can still be worthwhile to do this.		// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.		// For example, merging 4 i8 stores into an i32 store is useful almost always.
// However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the		// However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
// memset will be split into 2 32-bit stores anyway) and doing so can		// memset will be split into 2 32-bit stores anyway) and doing so can
// pessimize the llvm optimizer.		// pessimize the llvm optimizer.
//		//
// Since we don't have perfect knowledge here, make some assumptions: assume		// Since we don't have perfect knowledge here, make some assumptions: assume
// the maximum GPR width is the same size as the largest legal integer		// the maximum GPR width is the same size as the largest legal integer
// size. If so, check to see whether we will end up actually reducing the		// size. If so, check to see whether we will end up actually reducing the
// number of stores used.		// number of stores used.
unsigned Bytes = unsigned(End-Start);		unsigned Bytes = unsigned(End-Start);
unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;		unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
if (MaxIntSize == 0)		if (MaxIntSize == 0)
MaxIntSize = 1;		MaxIntSize = 1;
unsigned NumPointerStores = Bytes / MaxIntSize;		unsigned NumPointerStores = Bytes / MaxIntSize;

// Assume the remaining bytes if any are done a byte at a time.		// Assume the remaining bytes if any are done a byte at a time.
unsigned NumByteStores = Bytes % MaxIntSize;		unsigned NumByteStores = Bytes % MaxIntSize;

// If we will reduce the # stores (according to this heuristic), do the		// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32		// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
// etc.		// etc.
return TheStores.size() > NumPointerStores+NumByteStores;		return non_undef_stores > NumPointerStores+NumByteStores;
}		}

namespace {		namespace {

class MemsetRanges {		class MemsetRanges {
using range_iterator = SmallVectorImpl<MemsetRange>::iterator;		using range_iterator = SmallVectorImpl<MemsetRange>::iterator;

/// A sorted list of the memset ranges.		/// A sorted list of the memset ranges.
▲ Show 20 Lines • Show All 1,592 Lines • Show Last 20 Lines

llvm/test/Transforms/MemCpyOpt/merge-undef-memset.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -memcpyopt -opaque-pointers -S -verify-memoryssa \| FileCheck %s			; RUN: opt < %s -passes=memcpyopt -S -verify-memoryssa \| FileCheck %s
				nikicUnsubmitted Not Done Reply Inline Actions Please: Use update_test_checks.py. Fully convert the test to opaque pointers and remove the `-opaque-pointers` flag. Drop the unnecessary datalayout (unless I'm missing something?) Name instructions in test (run through `opt -S -passes=instnamer`). nikic: Please: * Use update_test_checks.py. * Fully convert the test to opaque pointers and remove…

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

	%S = type { { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 } }			%S = type { { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 }, { i8, i8 } }

	define void @alternating(%S* %0) {			define void @alternating(ptr %arg) {
	; CHECK-LABEL: @alternating(			; CHECK-LABEL: @alternating(
	; CHECK-NEXT: start			; CHECK-NEXT: [[I:%.]] = getelementptr inbounds { i8, i8 }, ptr [[ARG:%.]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I1:%.]] = getelementptr inbounds [[S:%.]], ptr [[ARG]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I1]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 2
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I3]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I5:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 3
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I6:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I5]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 4
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I8:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I7]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I9:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 5
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I9]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I11:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 6
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I12:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I11]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 7
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: [[I14:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I13]], i32 0, i32 1
	; CHECK-NEXT: getelementptr			; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[ARG]], i8 0, i64 16, i1 false)
	; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 %0, i8 0, i64 16, i1 false)			; CHECK-NEXT: ret void
				;
				store i8 0, ptr %arg, align 1
				%i = getelementptr inbounds { i8, i8 }, ptr %arg, i32 0, i32 1
				store i8 undef, ptr %i, align 1
				%i1 = getelementptr inbounds %S, ptr %arg, i32 0, i32 1
				store i8 0, ptr %i1, align 1
				%i2 = getelementptr inbounds { i8, i8 }, ptr %i1, i32 0, i32 1
				store i8 undef, ptr %i2, align 1
				%i3 = getelementptr inbounds %S, ptr %arg, i32 0, i32 2
				store i8 0, ptr %i3, align 1
				%i4 = getelementptr inbounds { i8, i8 }, ptr %i3, i32 0, i32 1
				store i8 undef, ptr %i4, align 1
				%i5 = getelementptr inbounds %S, ptr %arg, i32 0, i32 3
				store i8 0, ptr %i5, align 1
				%i6 = getelementptr inbounds { i8, i8 }, ptr %i5, i32 0, i32 1
				store i8 undef, ptr %i6, align 1
				%i7 = getelementptr inbounds %S, ptr %arg, i32 0, i32 4
				store i8 0, ptr %i7, align 1
				%i8 = getelementptr inbounds { i8, i8 }, ptr %i7, i32 0, i32 1
				store i8 undef, ptr %i8, align 1
				%i9 = getelementptr inbounds %S, ptr %arg, i32 0, i32 5
				store i8 0, ptr %i9, align 1
				%i10 = getelementptr inbounds { i8, i8 }, ptr %i9, i32 0, i32 1
				store i8 undef, ptr %i10, align 1
				%i11 = getelementptr inbounds %S, ptr %arg, i32 0, i32 6
				store i8 0, ptr %i11, align 1
				%i12 = getelementptr inbounds { i8, i8 }, ptr %i11, i32 0, i32 1
				store i8 undef, ptr %i12, align 1
				%i13 = getelementptr inbounds %S, ptr %arg, i32 0, i32 7
				store i8 0, ptr %i13, align 1
				%i14 = getelementptr inbounds { i8, i8 }, ptr %i13, i32 0, i32 1
				store i8 undef, ptr %i14, align 1
				ret void
				}

				define void @mostly_undef(ptr %arg) {
				; CHECK-LABEL: @mostly_undef(
				; CHECK-NEXT: store i8 0, ptr [[ARG:%.*]], align 1
				; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[ARG]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I]], align 1
				; CHECK-NEXT: [[I1:%.]] = getelementptr inbounds [[S:%.]], ptr [[ARG]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I1]], align 1
				; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I1]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I2]], align 1
				; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 2
				; CHECK-NEXT: store i8 undef, ptr [[I3]], align 1
				; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I3]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I4]], align 1
				; CHECK-NEXT: [[I5:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 3
				; CHECK-NEXT: store i8 undef, ptr [[I5]], align 1
				; CHECK-NEXT: [[I6:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I5]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I6]], align 1
				; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 4
				; CHECK-NEXT: store i8 undef, ptr [[I7]], align 1
				; CHECK-NEXT: [[I8:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I7]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I8]], align 1
				; CHECK-NEXT: [[I9:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 5
				; CHECK-NEXT: store i8 undef, ptr [[I9]], align 1
				; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I9]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I10]], align 1
				; CHECK-NEXT: [[I11:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 6
				; CHECK-NEXT: store i8 undef, ptr [[I11]], align 1
				; CHECK-NEXT: [[I12:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I11]], i32 0, i32 1
				; CHECK-NEXT: store i8 undef, ptr [[I12]], align 1
				; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds [[S]], ptr [[ARG]], i32 0, i32 7
				; CHECK-NEXT: store i8 undef, ptr [[I13]], align 1
				; CHECK-NEXT: [[I14:%.*]] = getelementptr inbounds { i8, i8 }, ptr [[I13]], i32 0, i32 1
				; CHECK-NEXT: store i8 0, ptr [[I14]], align 1
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	start:			;
	store i8 0, ptr %0, align 1			store i8 0, ptr %arg, align 1
	%1 = getelementptr inbounds { i8, i8 }, ptr %0, i32 0, i32 1			%i = getelementptr inbounds { i8, i8 }, ptr %arg, i32 0, i32 1
	store i8 undef, ptr %1, align 1			store i8 undef, ptr %i, align 1
	%2 = getelementptr inbounds %S, ptr %0, i32 0, i32 1			%i1 = getelementptr inbounds %S, ptr %arg, i32 0, i32 1
	store i8 0, ptr %2, align 1			store i8 undef, ptr %i1, align 1
	%3 = getelementptr inbounds { i8, i8 }, ptr %2, i32 0, i32 1			%i2 = getelementptr inbounds { i8, i8 }, ptr %i1, i32 0, i32 1
	store i8 undef, ptr %3, align 1			store i8 undef, ptr %i2, align 1
	%4 = getelementptr inbounds %S, ptr %0, i32 0, i32 2			%i3 = getelementptr inbounds %S, ptr %arg, i32 0, i32 2
	store i8 0, ptr %4, align 1			store i8 undef, ptr %i3, align 1
	%5 = getelementptr inbounds { i8, i8 }, ptr %4, i32 0, i32 1			%i4 = getelementptr inbounds { i8, i8 }, ptr %i3, i32 0, i32 1
	store i8 undef, ptr %5, align 1			store i8 undef, ptr %i4, align 1
	%6 = getelementptr inbounds %S, ptr %0, i32 0, i32 3			%i5 = getelementptr inbounds %S, ptr %arg, i32 0, i32 3
	store i8 0, ptr %6, align 1			store i8 undef, ptr %i5, align 1
	%7 = getelementptr inbounds { i8, i8 }, ptr %6, i32 0, i32 1			%i6 = getelementptr inbounds { i8, i8 }, ptr %i5, i32 0, i32 1
	store i8 undef, ptr %7, align 1			store i8 undef, ptr %i6, align 1
	%8 = getelementptr inbounds %S, ptr %0, i32 0, i32 4			%i7 = getelementptr inbounds %S, ptr %arg, i32 0, i32 4
	store i8 0, ptr %8, align 1			store i8 undef, ptr %i7, align 1
	%9 = getelementptr inbounds { i8, i8 }, ptr %8, i32 0, i32 1			%i8 = getelementptr inbounds { i8, i8 }, ptr %i7, i32 0, i32 1
	store i8 undef, ptr %9, align 1			store i8 undef, ptr %i8, align 1
	%10 = getelementptr inbounds %S, ptr %0, i32 0, i32 5			%i9 = getelementptr inbounds %S, ptr %arg, i32 0, i32 5
	store i8 0, ptr %10, align 1			store i8 undef, ptr %i9, align 1
	%11 = getelementptr inbounds { i8, i8 }, ptr %10, i32 0, i32 1			%i10 = getelementptr inbounds { i8, i8 }, ptr %i9, i32 0, i32 1
	store i8 undef, ptr %11, align 1			store i8 undef, ptr %i10, align 1
	%12 = getelementptr inbounds %S, ptr %0, i32 0, i32 6			%i11 = getelementptr inbounds %S, ptr %arg, i32 0, i32 6
	store i8 0, ptr %12, align 1			store i8 undef, ptr %i11, align 1
	%13 = getelementptr inbounds { i8, i8 }, ptr %12, i32 0, i32 1			%i12 = getelementptr inbounds { i8, i8 }, ptr %i11, i32 0, i32 1
	store i8 undef, ptr %13, align 1			store i8 undef, ptr %i12, align 1
	%14 = getelementptr inbounds %S, ptr %0, i32 0, i32 7			%i13 = getelementptr inbounds %S, ptr %arg, i32 0, i32 7
	store i8 0, ptr %14, align 1			store i8 undef, ptr %i13, align 1
	%15 = getelementptr inbounds { i8, i8 }, ptr %14, i32 0, i32 1			%i14 = getelementptr inbounds { i8, i8 }, ptr %i13, i32 0, i32 1
	store i8 undef, ptr %15, align 1			store i8 0, ptr %i14, align 1
	ret void			ret void
	}			}

	declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1)			declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1)

This is an archive of the discontinued LLVM Phabricator instance.

[MemCpyOpt] Merge succeeding undefs while attempting a `memset`
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 485420

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

llvm/test/Transforms/MemCpyOpt/merge-undef-memset.ll

This is an archive of the discontinued LLVM Phabricator instance.

[MemCpyOpt] Merge succeeding undefs while attempting a `memset`Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 485420

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

llvm/test/Transforms/MemCpyOpt/merge-undef-memset.ll

[MemCpyOpt] Merge succeeding undefs while attempting a `memset`
Needs ReviewPublic