This is an archive of the discontinued LLVM Phabricator instance.

[SLP] Allow overlapping vector accesses (WIP).
Needs ReviewPublic

Authored by fhahn on Aug 18 2021, 12:45 PM.

Download Raw Diff

Details

Reviewers

spatel
dtemirbulatov
anton-afanasyev
ABataev

Summary

NOTE: This is an extremely rough draft intended to start a wider discussion on how to allow overlapping memory accesses.

I would like to extend the SLP vectorizer to support overlapping vector
loads. This allows vectorizing cases where we operate on overlapping
vectors that can be loaded efficiently

The simplest C example is something like the snippet below, where we add
<s[0], s[1], s[2], s[3]> and <s[1], s[2], s[3], s[4]>. Those vectors can
be directly loaded from &s[0] and &s[1]. The problem is that currently
overlapping bundles are not allowed, which leads to gathering the second
vector, which is not profitable on AArch64.

void test(int *s,int* __restrict__ d) {
    for (int x=0;x<4;x++,s++) {
        d[x] = s[0] + s[1];
    }
}

The invariant that bundles should not overlap seems to be relied on and
encoded in multiple places. In this patch, I mostly tried to disable
various checks and assertions. It effectively allows overlapping
bundles, iff they first entry in Scalars is unique.

This clearly is not a proper solution, but I am hoping that sharing the
patch can be the start of a discussion on how to properly address the
limitations. It would be great if you could share your thoughts.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

fhahn created this revision.Aug 18 2021, 12:45 PM

Herald added subscribers: hiraditya, kristof.beyls. · View Herald TranscriptAug 18 2021, 12:45 PM

fhahn requested review of this revision.Aug 18 2021, 12:45 PM

Herald added a project: Restricted Project. · View Herald TranscriptAug 18 2021, 12:45 PM

Harbormaster completed remote builds in B120194: Diff 367306.Aug 18 2021, 12:46 PM

vporpo added a subscriber: vporpo.Nov 11 2021, 8:02 PM

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Vectorize/

SLPVectorizer.cpp

49 lines

test/

Transforms/

SLPVectorizer/

AArch64/

overlapping-vector-loads.ll

62 lines

Diff 367306

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,879 Lines • ▼ Show 20 Lines	TreeEntry newTreeEntry(ArrayRef<Value > VL,
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());		Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->State = EntryState;		Last->State = EntryState;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),		Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());		ReuseShuffleIndices.end());
Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());		Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
Last->setOperations(S);		Last->setOperations(S);
if (Last->State != TreeEntry::NeedToGather) {		if (Last->State != TreeEntry::NeedToGather) {
for (Value *V : VL) {		for (Value *V : VL) {
assert(!getTreeEntry(V) && "Scalar already in tree!");		if (getTreeEntry(V) && getTreeEntry(V)->Scalars[0] == V)
		continue;
		// assert(!getTreeEntry(V) && "Scalar already in tree!");
ScalarToTreeEntry[V] = Last;		ScalarToTreeEntry[V] = Last;
}		}
// Update the scheduler bundle to point to this TreeEntry.		// Update the scheduler bundle to point to this TreeEntry.
unsigned Lane = 0;		unsigned Lane = 0;
for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;		for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
BundleMember = BundleMember->NextInBundle) {		BundleMember = BundleMember->NextInBundle) {
BundleMember->TE = Last;		BundleMember->TE = Last;
BundleMember->Lane = Lane;		BundleMember->Lane = Lane;
++Lane;		++Lane;
}		}
assert((!Bundle.getValue() \|\| Lane == VL.size()) &&		// assert((!Bundle.getValue() \|\| Lane == VL.size()) &&
"Bundle and VL out of sync");		//"Bundle and VL out of sync");
} else {		} else {
MustGather.insert(VL.begin(), VL.end());		MustGather.insert(VL.begin(), VL.end());
}		}

if (UserTreeIdx.UserTE)		if (UserTreeIdx.UserTE)
Last->UserTreeIndices.push_back(UserTreeIdx);		Last->UserTreeIndices.push_back(UserTreeIdx);

return Last;		return Last;
▲ Show 20 Lines • Show All 330 Lines • ▼ Show 20 Lines	void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
});		});
};		};

// If BundleMember is a vector bundle, its operands may have been		// If BundleMember is a vector bundle, its operands may have been
// reordered duiring buildTree(). We therefore need to get its operands		// reordered duiring buildTree(). We therefore need to get its operands
// through the TreeEntry.		// through the TreeEntry.
if (TreeEntry *TE = BundleMember->TE) {		if (TreeEntry *TE = BundleMember->TE) {
int Lane = BundleMember->Lane;		int Lane = BundleMember->Lane;
		if (Lane >= TE->getOperand(0).size())
		break;

assert(Lane >= 0 && "Lane not set");		assert(Lane >= 0 && "Lane not set");

// Since vectorization tree is being built recursively this assertion		// Since vectorization tree is being built recursively this assertion
// ensures that the tree entry has all operands set before reaching		// ensures that the tree entry has all operands set before reaching
// this code. Couple of exceptions known at the moment are extracts		// this code. Couple of exceptions known at the moment are extracts
// where their second (immediate) operand is not added. Since		// where their second (immediate) operand is not added. Since
// immediates do not affect scheduler behavior this is considered		// immediates do not affect scheduler behavior this is considered
// okay.		// okay.
▲ Show 20 Lines • Show All 463 Lines • ▼ Show 20 Lines	if (EphValues.count(V)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V		LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is ephemeral.\n");		<< ") is ephemeral.\n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);		newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);
return;		return;
}		}
}		}

// Check if this is a duplicate of another entry.		// Check if this is a duplicate of another entry.
if (TreeEntry *E = getTreeEntry(S.OpValue)) {		// if (TreeEntry *E = getTreeEntry(S.OpValue)) {
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");		// LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
if (!E->isSame(VL)) {		// if (!E->isSame(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");		// LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);		// newTreeEntry(VL, None [>not vectorized<], S, UserTreeIdx);
return;		// return;
}		//}
// Record the reuse of the tree node. FIXME, currently this is only used to		//// Record the reuse of the tree node. FIXME, currently this is only used to
// properly draw the graph rather than for the actual vectorization.		//// properly draw the graph rather than for the actual vectorization.
E->UserTreeIndices.push_back(UserTreeIdx);		// E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue		// LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
<< ".\n");		//<< ".\n");
return;		// return;
}		//}

// Check that none of the instructions in the bundle are already in the tree.		// Check that none of the instructions in the bundle are already in the tree.
for (Value *V : VL) {		unsigned Idx = 0;
		for (unsigned Idx = 0, E = VL.size(); Idx != E; ++Idx) {
		Value *V = VL[Idx];
auto *I = dyn_cast<Instruction>(V);		auto *I = dyn_cast<Instruction>(V);
if (!I)		if (!I)
continue;		continue;
if (getTreeEntry(I)) {		if (getTreeEntry(I) && getTreeEntry(I)->Scalars[0] == I && Idx == 0) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V		LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is already in tree.\n");		<< ") is already in tree.\n");
newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);		newTreeEntry(VL, None /not vectorized/, S, UserTreeIdx);
return;		return;
}		}
}		}

// If any of the scalars is marked as a value that needs to stay scalar, then		// If any of the scalars is marked as a value that needs to stay scalar, then
▲ Show 20 Lines • Show All 3,243 Lines • ▼ Show 20 Lines	for (Value *V : VL) {
if (BundleMember->IsScheduled) {		if (BundleMember->IsScheduled) {
// A bundle member was scheduled as single instruction before and now		// A bundle member was scheduled as single instruction before and now
// needs to be scheduled as part of the bundle. We just get rid of the		// needs to be scheduled as part of the bundle. We just get rid of the
// existing schedule.		// existing schedule.
LLVM_DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember		LLVM_DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember
<< " was already scheduled\n");		<< " was already scheduled\n");
ReSchedule = true;		ReSchedule = true;
}		}
assert(BundleMember->isSchedulingEntity() &&		// assert(BundleMember->isSchedulingEntity() &&
"bundle member already part of other bundle");		//"bundle member already part of other bundle");
if (PrevInBundle) {		if (PrevInBundle) {
PrevInBundle->NextInBundle = BundleMember;		PrevInBundle->NextInBundle = BundleMember;
} else {		} else {
Bundle = BundleMember;		Bundle = BundleMember;
}		}
BundleMember->UnscheduledDepsInBundle = 0;		BundleMember->UnscheduledDepsInBundle = 0;
Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;		Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;

▲ Show 20 Lines • Show All 2,841 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/AArch64/overlapping-vector-loads.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -slp-vectorizer -mtriple=arm64-apple-darwin -S %s \| FileCheck %s			; RUN: opt -slp-vectorizer -mtriple=arm64-apple-darwin -S %s \| FileCheck %s

	; The adds in the function operate on 2 overlapping vectors <l1, l2, l3, l4>			; The adds in the function operate on 2 overlapping vectors <l1, l2, l3, l4>
	; and <l0, l1, l2, l3>. Both vectors can be loaded from memory efficiently.			; and <l0, l1, l2, l3>. Both vectors can be loaded from memory efficiently.
	define void @two_overlapping_loads_with_offset_1(i32* nocapture readonly %s, i32* noalias nocapture %d) {			define void @two_overlapping_loads_with_offset_1(i32* nocapture readonly %s, i32* noalias nocapture %d) {
	; CHECK-LABEL: @two_overlapping_loads_with_offset_1(			; CHECK-LABEL: @two_overlapping_loads_with_offset_1(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[L0:%.]] = load i32, i32 [[S:%.*]], align 4			; CHECK-NEXT: [[ARRAYIDX1_3:%.]] = getelementptr inbounds i32, i32 [[S:%.*]], i64 4
	; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i32, i32 [[S]], i64 1			; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i32, i32 [[S]], i64 1
	; CHECK-NEXT: [[L1:%.]] = load i32, i32 [[ARRAYIDX1]], align 4
	; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]]
	; CHECK-NEXT: store i32 [[ADD]], i32* [[D:%.*]], align 4
	; CHECK-NEXT: [[ARRAYIDX1_1:%.]] = getelementptr inbounds i32, i32 [[S]], i64 2			; CHECK-NEXT: [[ARRAYIDX1_1:%.]] = getelementptr inbounds i32, i32 [[S]], i64 2
	; CHECK-NEXT: [[L2:%.]] = load i32, i32 [[ARRAYIDX1_1]], align 4			; CHECK-NEXT: [[ARRAYIDX2_1:%.]] = getelementptr inbounds i32, i32 [[D:%.*]], i64 1
	; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[L2]], [[L1]]
	; CHECK-NEXT: [[ARRAYIDX2_1:%.]] = getelementptr inbounds i32, i32 [[D]], i64 1
	; CHECK-NEXT: store i32 [[ADD_1]], i32* [[ARRAYIDX2_1]], align 4
	; CHECK-NEXT: [[ARRAYIDX1_2:%.]] = getelementptr inbounds i32, i32 [[S]], i64 3			; CHECK-NEXT: [[ARRAYIDX1_2:%.]] = getelementptr inbounds i32, i32 [[S]], i64 3
	; CHECK-NEXT: [[L3:%.]] = load i32, i32 [[ARRAYIDX1_2]], align 4			; CHECK-NEXT: [[TMP0:%.]] = bitcast i32 [[S]] to <4 x i32>*
	; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[L3]], [[L2]]			; CHECK-NEXT: [[TMP1:%.]] = load <4 x i32>, <4 x i32> [[TMP0]], align 4
				; CHECK-NEXT: [[TMP2:%.]] = bitcast i32 [[ARRAYIDX1]] to <4 x i32>*
				; CHECK-NEXT: [[TMP3:%.]] = load <4 x i32>, <4 x i32> [[TMP2]], align 4
	; CHECK-NEXT: [[ARRAYIDX2_2:%.]] = getelementptr inbounds i32, i32 [[D]], i64 2			; CHECK-NEXT: [[ARRAYIDX2_2:%.]] = getelementptr inbounds i32, i32 [[D]], i64 2
	; CHECK-NEXT: store i32 [[ADD_2]], i32* [[ARRAYIDX2_2]], align 4			; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
	; CHECK-NEXT: [[ARRAYIDX1_3:%.]] = getelementptr inbounds i32, i32 [[S]], i64 4
	; CHECK-NEXT: [[L4:%.]] = load i32, i32 [[ARRAYIDX1_3]], align 4
	; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[L4]], [[L3]]
	; CHECK-NEXT: [[ARRAYIDX2_3:%.]] = getelementptr inbounds i32, i32 [[D]], i64 3			; CHECK-NEXT: [[ARRAYIDX2_3:%.]] = getelementptr inbounds i32, i32 [[D]], i64 3
	; CHECK-NEXT: store i32 [[ADD_3]], i32* [[ARRAYIDX2_3]], align 4			; CHECK-NEXT: [[TMP5:%.]] = bitcast i32 [[D]] to <4 x i32>*
				; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	%l0 = load i32, i32* %s, align 4			%l0 = load i32, i32* %s, align 4
	%arrayidx1 = getelementptr inbounds i32, i32* %s, i64 1			%arrayidx1 = getelementptr inbounds i32, i32* %s, i64 1
	%l1 = load i32, i32* %arrayidx1, align 4			%l1 = load i32, i32* %arrayidx1, align 4
	%add = add nsw i32 %l1, %l0			%add = add nsw i32 %l1, %l0
	store i32 %add, i32* %d, align 4			store i32 %add, i32* %d, align 4
	Show All 22 Lines
	; CHECK-LABEL: @four_overlapping_loads_with_offset_1(			; CHECK-LABEL: @four_overlapping_loads_with_offset_1(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i32, i32 [[S:%.*]], i64 1			; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i32, i32 [[S:%.*]], i64 1
	; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i32, i32 [[S]], i64 2			; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i32, i32 [[S]], i64 2
	; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i32, i32 [[S]], i64 3			; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i32, i32 [[S]], i64 3
	; CHECK-NEXT: [[ARRAYIDX3_1:%.]] = getelementptr inbounds i32, i32 [[S]], i64 4			; CHECK-NEXT: [[ARRAYIDX3_1:%.]] = getelementptr inbounds i32, i32 [[S]], i64 4
	; CHECK-NEXT: [[ARRAYIDX3_2:%.]] = getelementptr inbounds i32, i32 [[S]], i64 5			; CHECK-NEXT: [[ARRAYIDX3_2:%.]] = getelementptr inbounds i32, i32 [[S]], i64 5
	; CHECK-NEXT: [[ARRAYIDX3_3:%.]] = getelementptr inbounds i32, i32 [[S]], i64 6			; CHECK-NEXT: [[ARRAYIDX3_3:%.]] = getelementptr inbounds i32, i32 [[S]], i64 6
	; CHECK-NEXT: [[L0:%.]] = load i32, i32 [[S]], align 4			; CHECK-NEXT: [[TMP0:%.]] = bitcast i32 [[ARRAYIDX3]] to <4 x i32>*
	; CHECK-NEXT: [[L1:%.]] = load i32, i32 [[ARRAYIDX1]], align 4			; CHECK-NEXT: [[TMP1:%.]] = load <4 x i32>, <4 x i32> [[TMP0]], align 4
	; CHECK-NEXT: [[L2:%.]] = load i32, i32 [[ARRAYIDX2]], align 4			; CHECK-NEXT: [[TMP2:%.]] = bitcast i32 [[ARRAYIDX2]] to <4 x i32>*
	; CHECK-NEXT: [[L3:%.]] = load i32, i32 [[ARRAYIDX3]], align 4			; CHECK-NEXT: [[TMP3:%.]] = load <4 x i32>, <4 x i32> [[TMP2]], align 4
	; CHECK-NEXT: [[L4:%.]] = load i32, i32 [[ARRAYIDX3_1]], align 4			; CHECK-NEXT: [[TMP4:%.]] = bitcast i32 [[S]] to <4 x i32>*
	; CHECK-NEXT: [[L5:%.]] = load i32, i32 [[ARRAYIDX3_2]], align 4			; CHECK-NEXT: [[TMP5:%.]] = load <4 x i32>, <4 x i32> [[TMP4]], align 4
	; CHECK-NEXT: [[L6:%.]] = load i32, i32 [[ARRAYIDX3_3]], align 4			; CHECK-NEXT: [[TMP6:%.]] = bitcast i32 [[ARRAYIDX1]] to <4 x i32>*
	; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]]			; CHECK-NEXT: [[TMP7:%.]] = load <4 x i32>, <4 x i32> [[TMP6]], align 4
	; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[ADD]], [[L2]]			; CHECK-NEXT: [[ARRAYIDX5_1:%.]] = getelementptr inbounds i32, i32 [[D:%.*]], i64 1
	; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[SUB]], [[L3]]
	; CHECK-NEXT: store i32 [[ADD4]], i32* [[D:%.*]], align 4
	; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[L2]], [[L1]]
	; CHECK-NEXT: [[SUB_1:%.*]] = sub i32 [[ADD_1]], [[L3]]
	; CHECK-NEXT: [[ADD4_1:%.*]] = add nsw i32 [[SUB_1]], [[L4]]
	; CHECK-NEXT: [[ARRAYIDX5_1:%.]] = getelementptr inbounds i32, i32 [[D]], i64 1
	; CHECK-NEXT: store i32 [[ADD4_1]], i32* [[ARRAYIDX5_1]], align 4
	; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[L3]], [[L2]]
	; CHECK-NEXT: [[SUB_2:%.*]] = sub i32 [[ADD_2]], [[L4]]
	; CHECK-NEXT: [[ADD4_2:%.*]] = add nsw i32 [[SUB_2]], [[L5]]
	; CHECK-NEXT: [[ARRAYIDX5_2:%.]] = getelementptr inbounds i32, i32 [[D]], i64 2			; CHECK-NEXT: [[ARRAYIDX5_2:%.]] = getelementptr inbounds i32, i32 [[D]], i64 2
	; CHECK-NEXT: store i32 [[ADD4_2]], i32* [[ARRAYIDX5_2]], align 4			; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP5]]
	; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[L4]], [[L3]]			; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP8]], [[TMP3]]
	; CHECK-NEXT: [[SUB_3:%.*]] = sub i32 [[ADD_3]], [[L5]]			; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP1]]
	; CHECK-NEXT: [[ADD4_3:%.*]] = add nsw i32 [[SUB_3]], [[L6]]
	; CHECK-NEXT: [[ARRAYIDX5_3:%.]] = getelementptr inbounds i32, i32 [[D]], i64 3			; CHECK-NEXT: [[ARRAYIDX5_3:%.]] = getelementptr inbounds i32, i32 [[D]], i64 3
	; CHECK-NEXT: store i32 [[ADD4_3]], i32* [[ARRAYIDX5_3]], align 4			; CHECK-NEXT: [[TMP11:%.]] = bitcast i32 [[D]] to <4 x i32>*
				; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP11]], align 4
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	%arrayidx1 = getelementptr inbounds i32, i32* %s, i64 1			%arrayidx1 = getelementptr inbounds i32, i32* %s, i64 1
	%arrayidx2 = getelementptr inbounds i32, i32* %s, i64 2			%arrayidx2 = getelementptr inbounds i32, i32* %s, i64 2
	%arrayidx3 = getelementptr inbounds i32, i32* %s, i64 3			%arrayidx3 = getelementptr inbounds i32, i32* %s, i64 3
	%arrayidx3.1 = getelementptr inbounds i32, i32* %s, i64 4			%arrayidx3.1 = getelementptr inbounds i32, i32* %s, i64 4
	%arrayidx3.2 = getelementptr inbounds i32, i32* %s, i64 5			%arrayidx3.2 = getelementptr inbounds i32, i32* %s, i64 5
	Show All 29 Lines