This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombiner] allow store merging non-i8 truncated ops
ClosedPublic

Authored by spatel on Aug 23 2020, 6:25 AM.

Download Raw Diff

Details

Reviewers

efriedma
RKSimon
craig.topper
dmgreen

Commits

rG54a5dd485c4d: [DAGCombiner] allow store merging non-i8 truncated ops

Summary

We have a gap in our store merging capabilities for shift+truncate patterns as discussed in:
https://llvm.org/PR46662

I generalized the code/comments for this function in earlier commits, so we only need ease the type restriction and adjust the address/endian checking to make this work.
AArch64 lets us switch endian to make sure that patterns are matched either way.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

spatel created this revision.Aug 23 2020, 6:25 AM

Herald added a project: Restricted Project. · View Herald TranscriptAug 23 2020, 6:25 AM

Herald added subscribers: ecnelises, hiraditya, kristof.beyls, mcrosier. · View Herald Transcript

spatel requested review of this revision.Aug 23 2020, 6:25 AM

RKSimon edited the summary of this revision. (Show Details)Aug 26 2020, 2:35 AM

LGTM - would you be able to get the "pair swap" rotate tweak done as a followup? Ideally with x86 test coverage as well.

This revision is now accepted and ready to land.Aug 26 2020, 2:42 AM

In D86420#2238310, @RKSimon wrote:

LGTM - would you be able to get the "pair swap" rotate tweak done as a followup? Ideally with x86 test coverage as well.

Yes, that doesn't seem like a big add-on. We allowed something like that in D83567.
Also, this code sits outside of all of the other store merging that we do in DAGCombiner, so there's potential for refactoring.

Closed by commit rG54a5dd485c4d: [DAGCombiner] allow store merging non-i8 truncated ops (authored by spatel). · Explain WhyAug 26 2020, 12:26 PM

This revision was automatically updated to reflect the committed changes.

spatel added a commit: rG54a5dd485c4d: [DAGCombiner] allow store merging non-i8 truncated ops.

spatel mentioned this in D87112: [DAGCombiner] allow more store merging for non-i8 truncated ops.Sep 3 2020, 2:15 PM

spatel mentioned this in rG7a06b166b1af: [DAGCombiner] allow more store merging for non-i8 truncated ops.Sep 7 2020, 11:12 AM

spatel mentioned this in rGdd763ac79196: [SDAG] fix miscompile from merging stores of different sizes.Jun 9 2021, 6:56 AM

Revision Contents

Path

Size

llvm/

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

37 lines

test/

CodeGen/

AArch64/

merge-trunc-store.ll

240 lines

X86/

stores-merging.ll

18 lines

Diff 288078

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,863 Lines • ▼ Show 20 Lines	SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
if (LegalOperations)		if (LegalOperations)
return SDValue();		return SDValue();

// Collect all the stores in the chain.		// Collect all the stores in the chain.
SDValue Chain;		SDValue Chain;
SmallVector<StoreSDNode *, 8> Stores;		SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {		for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
// TODO: Allow unordered atomics when wider type is legal (see D66309)		// TODO: Allow unordered atomics when wider type is legal (see D66309)
if (Store->getMemoryVT() != MVT::i8 \|\| !Store->isSimple() \|\|		EVT MemVT = Store->getMemoryVT();
Store->isIndexed())		if (!(MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\| MemVT == MVT::i32) \|\|
		!Store->isSimple() \|\| Store->isIndexed())
return SDValue();		return SDValue();
Stores.push_back(Store);		Stores.push_back(Store);
Chain = Store->getChain();		Chain = Store->getChain();
}		}
// There is no reason to continue if we do not have at least a pair of stores.		// There is no reason to continue if we do not have at least a pair of stores.
if (Stores.size() < 2)		if (Stores.size() < 2)
return SDValue();		return SDValue();

▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	for (auto Store : Stores) {
if (Offset < 0 \|\| Offset >= NumStores \|\| OffsetMap[Offset] != INT64_MAX)		if (Offset < 0 \|\| Offset >= NumStores \|\| OffsetMap[Offset] != INT64_MAX)
return SDValue();		return SDValue();
OffsetMap[Offset] = ByteOffsetFromBase;		OffsetMap[Offset] = ByteOffsetFromBase;
}		}

assert(FirstOffset != INT64_MAX && "First byte offset must be set");		assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");		assert(FirstStore && "First store must be set");

// Check if the bytes of the combined value we are looking at match with
// either big or little endian value store.
Optional<bool> IsBigEndian = isBigEndian(OffsetMap, FirstOffset);
if (!IsBigEndian.hasValue())
return SDValue();

// Check that a store of the wide type is both allowed and fast on the target		// Check that a store of the wide type is both allowed and fast on the target
const DataLayout &Layout = DAG.getDataLayout();		const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;		bool Fast = false;
bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,		bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
*FirstStore->getMemOperand(), &Fast);		*FirstStore->getMemOperand(), &Fast);
if (!Allowed \|\| !Fast)		if (!Allowed \|\| !Fast)
return SDValue();		return SDValue();

		// Check if the pieces of the value are going to the expected places in memory
		// to merge the stores.
		auto checkOffsets = [&](bool MatchLittleEndian) {
		if (MatchLittleEndian) {
		for (unsigned i = 0; i != NumStores; ++i)
		if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
		return false;
		} else { // MatchBigEndian by reversing loop counter.
		for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
		if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
		return false;
		}
		return true;
		};

		// Check if the offsets line up for the native data layout of this target.
		bool NeedBswap = false;
		if (!checkOffsets(Layout.isLittleEndian())) {
		// Special-case: check if byte offsets line up for the opposite endian.
		// TODO: We could use rotates for 16/32-bit merge pairs.
		if (NarrowNumBits != 8 \|\| !checkOffsets(Layout.isBigEndian()))
		return SDValue();
		NeedBswap = true;
		}

SDLoc DL(N);		SDLoc DL(N);
if (WideVT != SourceValue.getValueType()) {		if (WideVT != SourceValue.getValueType()) {
assert(SourceValue.getValueType().getSizeInBits() > WideNumBits &&		assert(SourceValue.getValueType().getSizeInBits() > WideNumBits &&
"Unexpected store value to merge");		"Unexpected store value to merge");
SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);		SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
}		}

// Before legalize we can introduce illegal bswaps which will be later		// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single		// converted to an explicit bswap sequence. This way we end up with a single
// store and byte shuffling instead of several stores and byte shuffling.		// store and byte shuffling instead of several stores and byte shuffling.
bool NeedBswap = Layout.isBigEndian() != *IsBigEndian;
if (NeedBswap)		if (NeedBswap)
SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);		SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);

SDValue NewStore =		SDValue NewStore =
DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),		DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
FirstStore->getPointerInfo(), FirstStore->getAlignment());		FirstStore->getPointerInfo(), FirstStore->getAlignment());

// Rely on other DAG combine rules to remove the other individual stores.		// Rely on other DAG combine rules to remove the other individual stores.
▲ Show 20 Lines • Show All 15,210 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/merge-trunc-store.ll

Show First 20 Lines • Show All 194 Lines • ▼ Show 20 Lines	; BE-NEXT: ret
store i8 %t3, i8* %p0, align 1		store i8 %t3, i8* %p0, align 1
store i8 %t2, i8* %p1, align 1		store i8 %t2, i8* %p1, align 1
store i8 %t0, i8* %p3, align 1		store i8 %t0, i8* %p3, align 1
store i8 %t1, i8* %p2, align 1		store i8 %t1, i8* %p2, align 1
ret void		ret void
}		}

define void @le_i32_to_i16(i32 %x, i16* %p0) {		define void @le_i32_to_i16(i32 %x, i16* %p0) {
; CHECK-LABEL: le_i32_to_i16:		; LE-LABEL: le_i32_to_i16:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16		; LE-NEXT: str w0, [x1]
; CHECK-NEXT: strh w0, [x1]		; LE-NEXT: ret
; CHECK-NEXT: strh w8, [x1, #2]		;
; CHECK-NEXT: ret		; BE-LABEL: le_i32_to_i16:
		; BE: // %bb.0:
		; BE-NEXT: lsr w8, w0, #16
		; BE-NEXT: strh w0, [x1]
		; BE-NEXT: strh w8, [x1, #2]
		; BE-NEXT: ret
%sh1 = lshr i32 %x, 16		%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16		%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16		%t1 = trunc i32 %sh1 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
store i16 %t0, i16* %p0, align 2		store i16 %t0, i16* %p0, align 2
store i16 %t1, i16* %p1, align 2		store i16 %t1, i16* %p1, align 2
ret void		ret void
}		}

define void @le_i32_to_i16_order(i32 %x, i16* %p0) {		define void @le_i32_to_i16_order(i32 %x, i16* %p0) {
; CHECK-LABEL: le_i32_to_i16_order:		; LE-LABEL: le_i32_to_i16_order:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16		; LE-NEXT: str w0, [x1]
; CHECK-NEXT: strh w8, [x1, #2]		; LE-NEXT: ret
; CHECK-NEXT: strh w0, [x1]		;
; CHECK-NEXT: ret		; BE-LABEL: le_i32_to_i16_order:
		; BE: // %bb.0:
		; BE-NEXT: lsr w8, w0, #16
		; BE-NEXT: strh w8, [x1, #2]
		; BE-NEXT: strh w0, [x1]
		; BE-NEXT: ret
%sh1 = lshr i32 %x, 16		%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16		%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16		%t1 = trunc i32 %sh1 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
store i16 %t1, i16* %p1, align 2		store i16 %t1, i16* %p1, align 2
store i16 %t0, i16* %p0, align 2		store i16 %t0, i16* %p0, align 2
ret void		ret void
}		}

define void @be_i32_to_i16(i32 %x, i16* %p0) {		define void @be_i32_to_i16(i32 %x, i16* %p0) {
; CHECK-LABEL: be_i32_to_i16:		; LE-LABEL: be_i32_to_i16:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16		; LE-NEXT: lsr w8, w0, #16
; CHECK-NEXT: strh w0, [x1, #2]		; LE-NEXT: strh w0, [x1, #2]
; CHECK-NEXT: strh w8, [x1]		; LE-NEXT: strh w8, [x1]
; CHECK-NEXT: ret		; LE-NEXT: ret
		;
		; BE-LABEL: be_i32_to_i16:
		; BE: // %bb.0:
		; BE-NEXT: str w0, [x1]
		; BE-NEXT: ret
%sh1 = lshr i32 %x, 16		%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16		%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16		%t1 = trunc i32 %sh1 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
store i16 %t0, i16* %p1, align 2		store i16 %t0, i16* %p1, align 2
store i16 %t1, i16* %p0, align 2		store i16 %t1, i16* %p0, align 2
ret void		ret void
}		}

define void @be_i32_to_i16_order(i32 %x, i16* %p0) {		define void @be_i32_to_i16_order(i32 %x, i16* %p0) {
; CHECK-LABEL: be_i32_to_i16_order:		; LE-LABEL: be_i32_to_i16_order:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16		; LE-NEXT: lsr w8, w0, #16
; CHECK-NEXT: strh w8, [x1]		; LE-NEXT: strh w8, [x1]
; CHECK-NEXT: strh w0, [x1, #2]		; LE-NEXT: strh w0, [x1, #2]
; CHECK-NEXT: ret		; LE-NEXT: ret
		;
		; BE-LABEL: be_i32_to_i16_order:
		; BE: // %bb.0:
		; BE-NEXT: str w0, [x1]
		; BE-NEXT: ret
%sh1 = lshr i32 %x, 16		%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16		%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16		%t1 = trunc i32 %sh1 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
store i16 %t1, i16* %p0, align 2		store i16 %t1, i16* %p0, align 2
store i16 %t0, i16* %p1, align 2		store i16 %t0, i16* %p1, align 2
ret void		ret void
}		}
▲ Show 20 Lines • Show All 170 Lines • ▼ Show 20 Lines	; BE-NEXT: ret
store i8 %t3, i8* %p4, align 1		store i8 %t3, i8* %p4, align 1
store i8 %t2, i8* %p5, align 1		store i8 %t2, i8* %p5, align 1
store i8 %t1, i8* %p6, align 1		store i8 %t1, i8* %p6, align 1
store i8 %t0, i8* %p7, align 1		store i8 %t0, i8* %p7, align 1
ret void		ret void
}		}

define void @le_i64_to_i16(i64 %x, i16* %p0) {		define void @le_i64_to_i16(i64 %x, i16* %p0) {
; CHECK-LABEL: le_i64_to_i16:		; LE-LABEL: le_i64_to_i16:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16		; LE-NEXT: str x0, [x1]
; CHECK-NEXT: lsr x9, x0, #32		; LE-NEXT: ret
; CHECK-NEXT: lsr x10, x0, #48		;
; CHECK-NEXT: strh w0, [x1]		; BE-LABEL: le_i64_to_i16:
; CHECK-NEXT: strh w8, [x1, #2]		; BE: // %bb.0:
; CHECK-NEXT: strh w9, [x1, #4]		; BE-NEXT: lsr x8, x0, #16
; CHECK-NEXT: strh w10, [x1, #6]		; BE-NEXT: lsr x9, x0, #32
; CHECK-NEXT: ret		; BE-NEXT: lsr x10, x0, #48
		; BE-NEXT: strh w0, [x1]
		; BE-NEXT: strh w8, [x1, #2]
		; BE-NEXT: strh w9, [x1, #4]
		; BE-NEXT: strh w10, [x1, #6]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 16		%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32		%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48		%sh3 = lshr i64 %x, 48
%t0 = trunc i64 %x to i16		%t0 = trunc i64 %x to i16
%t1 = trunc i64 %sh1 to i16		%t1 = trunc i64 %sh1 to i16
%t2 = trunc i64 %sh2 to i16		%t2 = trunc i64 %sh2 to i16
%t3 = trunc i64 %sh3 to i16		%t3 = trunc i64 %sh3 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
%p2 = getelementptr inbounds i16, i16* %p0, i64 2		%p2 = getelementptr inbounds i16, i16* %p0, i64 2
%p3 = getelementptr inbounds i16, i16* %p0, i64 3		%p3 = getelementptr inbounds i16, i16* %p0, i64 3
store i16 %t0, i16* %p0, align 2		store i16 %t0, i16* %p0, align 2
store i16 %t1, i16* %p1, align 2		store i16 %t1, i16* %p1, align 2
store i16 %t2, i16* %p2, align 2		store i16 %t2, i16* %p2, align 2
store i16 %t3, i16* %p3, align 2		store i16 %t3, i16* %p3, align 2
ret void		ret void
}		}

define void @le_i64_to_i16_order(i64 %x, i16* %p0) {		define void @le_i64_to_i16_order(i64 %x, i16* %p0) {
; CHECK-LABEL: le_i64_to_i16_order:		; LE-LABEL: le_i64_to_i16_order:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16		; LE-NEXT: str x0, [x1]
; CHECK-NEXT: lsr x9, x0, #32		; LE-NEXT: ret
; CHECK-NEXT: lsr x10, x0, #48		;
; CHECK-NEXT: strh w0, [x1]		; BE-LABEL: le_i64_to_i16_order:
; CHECK-NEXT: strh w8, [x1, #2]		; BE: // %bb.0:
; CHECK-NEXT: strh w10, [x1, #6]		; BE-NEXT: lsr x8, x0, #16
; CHECK-NEXT: strh w9, [x1, #4]		; BE-NEXT: lsr x9, x0, #32
; CHECK-NEXT: ret		; BE-NEXT: lsr x10, x0, #48
		; BE-NEXT: strh w0, [x1]
		; BE-NEXT: strh w8, [x1, #2]
		; BE-NEXT: strh w10, [x1, #6]
		; BE-NEXT: strh w9, [x1, #4]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 16		%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32		%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48		%sh3 = lshr i64 %x, 48
%t0 = trunc i64 %x to i16		%t0 = trunc i64 %x to i16
%t1 = trunc i64 %sh1 to i16		%t1 = trunc i64 %sh1 to i16
%t2 = trunc i64 %sh2 to i16		%t2 = trunc i64 %sh2 to i16
%t3 = trunc i64 %sh3 to i16		%t3 = trunc i64 %sh3 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
%p2 = getelementptr inbounds i16, i16* %p0, i64 2		%p2 = getelementptr inbounds i16, i16* %p0, i64 2
%p3 = getelementptr inbounds i16, i16* %p0, i64 3		%p3 = getelementptr inbounds i16, i16* %p0, i64 3
store i16 %t1, i16* %p1, align 2		store i16 %t1, i16* %p1, align 2
store i16 %t3, i16* %p3, align 2		store i16 %t3, i16* %p3, align 2
store i16 %t0, i16* %p0, align 2		store i16 %t0, i16* %p0, align 2
store i16 %t2, i16* %p2, align 2		store i16 %t2, i16* %p2, align 2
ret void		ret void
}		}

define void @be_i64_to_i16(i64 %x, i16* %p0) {		define void @be_i64_to_i16(i64 %x, i16* %p0) {
; CHECK-LABEL: be_i64_to_i16:		; LE-LABEL: be_i64_to_i16:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16		; LE-NEXT: lsr x8, x0, #16
; CHECK-NEXT: lsr x9, x0, #32		; LE-NEXT: lsr x9, x0, #32
; CHECK-NEXT: lsr x10, x0, #48		; LE-NEXT: lsr x10, x0, #48
; CHECK-NEXT: strh w0, [x1, #6]		; LE-NEXT: strh w0, [x1, #6]
; CHECK-NEXT: strh w8, [x1, #4]		; LE-NEXT: strh w8, [x1, #4]
; CHECK-NEXT: strh w9, [x1, #2]		; LE-NEXT: strh w9, [x1, #2]
; CHECK-NEXT: strh w10, [x1]		; LE-NEXT: strh w10, [x1]
; CHECK-NEXT: ret		; LE-NEXT: ret
		;
		; BE-LABEL: be_i64_to_i16:
		; BE: // %bb.0:
		; BE-NEXT: str x0, [x1]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 16		%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32		%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48		%sh3 = lshr i64 %x, 48
%t0 = trunc i64 %x to i16		%t0 = trunc i64 %x to i16
%t1 = trunc i64 %sh1 to i16		%t1 = trunc i64 %sh1 to i16
%t2 = trunc i64 %sh2 to i16		%t2 = trunc i64 %sh2 to i16
%t3 = trunc i64 %sh3 to i16		%t3 = trunc i64 %sh3 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
%p2 = getelementptr inbounds i16, i16* %p0, i64 2		%p2 = getelementptr inbounds i16, i16* %p0, i64 2
%p3 = getelementptr inbounds i16, i16* %p0, i64 3		%p3 = getelementptr inbounds i16, i16* %p0, i64 3
store i16 %t0, i16* %p3, align 2		store i16 %t0, i16* %p3, align 2
store i16 %t1, i16* %p2, align 2		store i16 %t1, i16* %p2, align 2
store i16 %t2, i16* %p1, align 2		store i16 %t2, i16* %p1, align 2
store i16 %t3, i16* %p0, align 2		store i16 %t3, i16* %p0, align 2
ret void		ret void
}		}

define void @be_i64_to_i16_order(i64 %x, i16* %p0) {		define void @be_i64_to_i16_order(i64 %x, i16* %p0) {
; CHECK-LABEL: be_i64_to_i16_order:		; LE-LABEL: be_i64_to_i16_order:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16		; LE-NEXT: lsr x8, x0, #16
; CHECK-NEXT: lsr x9, x0, #32		; LE-NEXT: lsr x9, x0, #32
; CHECK-NEXT: lsr x10, x0, #48		; LE-NEXT: lsr x10, x0, #48
; CHECK-NEXT: strh w0, [x1, #6]		; LE-NEXT: strh w0, [x1, #6]
; CHECK-NEXT: strh w10, [x1]		; LE-NEXT: strh w10, [x1]
; CHECK-NEXT: strh w9, [x1, #2]		; LE-NEXT: strh w9, [x1, #2]
; CHECK-NEXT: strh w8, [x1, #4]		; LE-NEXT: strh w8, [x1, #4]
; CHECK-NEXT: ret		; LE-NEXT: ret
		;
		; BE-LABEL: be_i64_to_i16_order:
		; BE: // %bb.0:
		; BE-NEXT: str x0, [x1]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 16		%sh1 = lshr i64 %x, 16
%sh2 = lshr i64 %x, 32		%sh2 = lshr i64 %x, 32
%sh3 = lshr i64 %x, 48		%sh3 = lshr i64 %x, 48
%t0 = trunc i64 %x to i16		%t0 = trunc i64 %x to i16
%t1 = trunc i64 %sh1 to i16		%t1 = trunc i64 %sh1 to i16
%t2 = trunc i64 %sh2 to i16		%t2 = trunc i64 %sh2 to i16
%t3 = trunc i64 %sh3 to i16		%t3 = trunc i64 %sh3 to i16
%p1 = getelementptr inbounds i16, i16* %p0, i64 1		%p1 = getelementptr inbounds i16, i16* %p0, i64 1
%p2 = getelementptr inbounds i16, i16* %p0, i64 2		%p2 = getelementptr inbounds i16, i16* %p0, i64 2
%p3 = getelementptr inbounds i16, i16* %p0, i64 3		%p3 = getelementptr inbounds i16, i16* %p0, i64 3
store i16 %t0, i16* %p3, align 2		store i16 %t0, i16* %p3, align 2
store i16 %t3, i16* %p0, align 2		store i16 %t3, i16* %p0, align 2
store i16 %t2, i16* %p1, align 2		store i16 %t2, i16* %p1, align 2
store i16 %t1, i16* %p2, align 2		store i16 %t1, i16* %p2, align 2
ret void		ret void
}		}

define void @le_i64_to_i32(i64 %x, i32* %p0) {		define void @le_i64_to_i32(i64 %x, i32* %p0) {
; CHECK-LABEL: le_i64_to_i32:		; LE-LABEL: le_i64_to_i32:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32		; LE-NEXT: str x0, [x1]
; CHECK-NEXT: stp w0, w8, [x1]		; LE-NEXT: ret
; CHECK-NEXT: ret		;
		; BE-LABEL: le_i64_to_i32:
		; BE: // %bb.0:
		; BE-NEXT: lsr x8, x0, #32
		; BE-NEXT: stp w0, w8, [x1]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 32		%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32		%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32		%t1 = trunc i64 %sh1 to i32
%p1 = getelementptr inbounds i32, i32* %p0, i64 1		%p1 = getelementptr inbounds i32, i32* %p0, i64 1
store i32 %t0, i32* %p0, align 4		store i32 %t0, i32* %p0, align 4
store i32 %t1, i32* %p1, align 4		store i32 %t1, i32* %p1, align 4
ret void		ret void
}		}

define void @le_i64_to_i32_order(i64 %x, i32* %p0) {		define void @le_i64_to_i32_order(i64 %x, i32* %p0) {
; CHECK-LABEL: le_i64_to_i32_order:		; LE-LABEL: le_i64_to_i32_order:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32		; LE-NEXT: str x0, [x1]
; CHECK-NEXT: stp w0, w8, [x1]		; LE-NEXT: ret
; CHECK-NEXT: ret		;
		; BE-LABEL: le_i64_to_i32_order:
		; BE: // %bb.0:
		; BE-NEXT: lsr x8, x0, #32
		; BE-NEXT: stp w0, w8, [x1]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 32		%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32		%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32		%t1 = trunc i64 %sh1 to i32
%p1 = getelementptr inbounds i32, i32* %p0, i64 1		%p1 = getelementptr inbounds i32, i32* %p0, i64 1
store i32 %t1, i32* %p1, align 4		store i32 %t1, i32* %p1, align 4
store i32 %t0, i32* %p0, align 4		store i32 %t0, i32* %p0, align 4
ret void		ret void
}		}

define void @be_i64_to_i32(i64 %x, i32* %p0) {		define void @be_i64_to_i32(i64 %x, i32* %p0) {
; CHECK-LABEL: be_i64_to_i32:		; LE-LABEL: be_i64_to_i32:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32		; LE-NEXT: lsr x8, x0, #32
; CHECK-NEXT: stp w8, w0, [x1]		; LE-NEXT: stp w8, w0, [x1]
; CHECK-NEXT: ret		; LE-NEXT: ret
		;
		; BE-LABEL: be_i64_to_i32:
		; BE: // %bb.0:
		; BE-NEXT: str x0, [x1]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 32		%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32		%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32		%t1 = trunc i64 %sh1 to i32
%p1 = getelementptr inbounds i32, i32* %p0, i64 1		%p1 = getelementptr inbounds i32, i32* %p0, i64 1
store i32 %t0, i32* %p1, align 4		store i32 %t0, i32* %p1, align 4
store i32 %t1, i32* %p0, align 4		store i32 %t1, i32* %p0, align 4
ret void		ret void
}		}

define void @be_i64_to_i32_order(i64 %x, i32* %p0) {		define void @be_i64_to_i32_order(i64 %x, i32* %p0) {
; CHECK-LABEL: be_i64_to_i32_order:		; LE-LABEL: be_i64_to_i32_order:
; CHECK: // %bb.0:		; LE: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32		; LE-NEXT: lsr x8, x0, #32
; CHECK-NEXT: stp w8, w0, [x1]		; LE-NEXT: stp w8, w0, [x1]
; CHECK-NEXT: ret		; LE-NEXT: ret
		;
		; BE-LABEL: be_i64_to_i32_order:
		; BE: // %bb.0:
		; BE-NEXT: str x0, [x1]
		; BE-NEXT: ret
%sh1 = lshr i64 %x, 32		%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32		%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32		%t1 = trunc i64 %sh1 to i32
%p1 = getelementptr inbounds i32, i32* %p0, i64 1		%p1 = getelementptr inbounds i32, i32* %p0, i64 1
store i32 %t1, i32* %p0, align 4		store i32 %t1, i32* %p0, align 4
store i32 %t0, i32* %p1, align 4		store i32 %t0, i32* %p1, align 4
ret void		ret void
}		}

		; Negative test - not consecutive addresses

define void @i64_to_i32_wrong_addr(i64 %x, i32* %p0) {		define void @i64_to_i32_wrong_addr(i64 %x, i32* %p0) {
; CHECK-LABEL: i64_to_i32_wrong_addr:		; CHECK-LABEL: i64_to_i32_wrong_addr:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32		; CHECK-NEXT: lsr x8, x0, #32
; CHECK-NEXT: str w8, [x1, #12]		; CHECK-NEXT: str w8, [x1, #12]
; CHECK-NEXT: str w0, [x1]		; CHECK-NEXT: str w0, [x1]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%sh1 = lshr i64 %x, 32		%sh1 = lshr i64 %x, 32
%t0 = trunc i64 %x to i32		%t0 = trunc i64 %x to i32
%t1 = trunc i64 %sh1 to i32		%t1 = trunc i64 %sh1 to i32
%p3 = getelementptr inbounds i32, i32* %p0, i64 3		%p3 = getelementptr inbounds i32, i32* %p0, i64 3
store i32 %t1, i32* %p3, align 4		store i32 %t1, i32* %p3, align 4
store i32 %t0, i32* %p0, align 4		store i32 %t0, i32* %p0, align 4
ret void		ret void
}		}

		; Negative test - addresses don't line up with shift amounts

define void @i64_to_i16_wrong_order(i64 %x, i16* %p0) {		define void @i64_to_i16_wrong_order(i64 %x, i16* %p0) {
; CHECK-LABEL: i64_to_i16_wrong_order:		; CHECK-LABEL: i64_to_i16_wrong_order:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #16		; CHECK-NEXT: lsr x8, x0, #16
; CHECK-NEXT: lsr x9, x0, #32		; CHECK-NEXT: lsr x9, x0, #32
; CHECK-NEXT: lsr x10, x0, #48		; CHECK-NEXT: lsr x10, x0, #48
; CHECK-NEXT: strh w10, [x1, #6]		; CHECK-NEXT: strh w10, [x1, #6]
; CHECK-NEXT: strh w8, [x1, #4]		; CHECK-NEXT: strh w8, [x1, #4]
Show All 12 Lines	; CHECK-NEXT: ret
%p3 = getelementptr inbounds i16, i16* %p0, i64 3		%p3 = getelementptr inbounds i16, i16* %p0, i64 3
store i16 %t3, i16* %p3, align 2		store i16 %t3, i16* %p3, align 2
store i16 %t1, i16* %p2, align 2		store i16 %t1, i16* %p2, align 2
store i16 %t2, i16* %p1, align 2		store i16 %t2, i16* %p1, align 2
store i16 %t0, i16* %p0, align 2		store i16 %t0, i16* %p0, align 2
ret void		ret void
}		}

		; Negative test - no store of 't1'

define void @i32_to_i8_incomplete(i32 %x, i8* %p0) {		define void @i32_to_i8_incomplete(i32 %x, i8* %p0) {
; CHECK-LABEL: i32_to_i8_incomplete:		; CHECK-LABEL: i32_to_i8_incomplete:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16		; CHECK-NEXT: lsr w8, w0, #16
; CHECK-NEXT: lsr w9, w0, #24		; CHECK-NEXT: lsr w9, w0, #24
; CHECK-NEXT: strb w0, [x1]		; CHECK-NEXT: strb w0, [x1]
; CHECK-NEXT: strb w8, [x1, #2]		; CHECK-NEXT: strb w8, [x1, #2]
; CHECK-NEXT: strb w9, [x1, #3]		; CHECK-NEXT: strb w9, [x1, #3]
Show All 9 Lines	; CHECK-NEXT: ret
%p2 = getelementptr inbounds i8, i8* %p0, i64 2		%p2 = getelementptr inbounds i8, i8* %p0, i64 2
%p3 = getelementptr inbounds i8, i8* %p0, i64 3		%p3 = getelementptr inbounds i8, i8* %p0, i64 3
store i8 %t0, i8* %p0, align 1		store i8 %t0, i8* %p0, align 1
store i8 %t2, i8* %p2, align 1		store i8 %t2, i8* %p2, align 1
store i8 %t3, i8* %p3, align 1		store i8 %t3, i8* %p3, align 1
ret void		ret void
}		}

		; Negative test - no store of 't3'

define void @i64_to_i8_incomplete(i64 %x, i8* %p0) {		define void @i64_to_i8_incomplete(i64 %x, i8* %p0) {
; CHECK-LABEL: i64_to_i8_incomplete:		; CHECK-LABEL: i64_to_i8_incomplete:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #8		; CHECK-NEXT: lsr x8, x0, #8
; CHECK-NEXT: lsr x9, x0, #16		; CHECK-NEXT: lsr x9, x0, #16
; CHECK-NEXT: lsr x10, x0, #32		; CHECK-NEXT: lsr x10, x0, #32
; CHECK-NEXT: lsr x11, x0, #40		; CHECK-NEXT: lsr x11, x0, #40
; CHECK-NEXT: lsr x12, x0, #48		; CHECK-NEXT: lsr x12, x0, #48
Show All 33 Lines	; CHECK-NEXT: ret
store i8 %t5, i8* %p2, align 1		store i8 %t5, i8* %p2, align 1
store i8 %t4, i8* %p3, align 1		store i8 %t4, i8* %p3, align 1
store i8 %t2, i8* %p5, align 1		store i8 %t2, i8* %p5, align 1
store i8 %t1, i8* %p6, align 1		store i8 %t1, i8* %p6, align 1
store i8 %t0, i8* %p7, align 1		store i8 %t0, i8* %p7, align 1
ret void		ret void
}		}

		; Negative test - not consecutive addresses

define void @i32_to_i16_wrong_addr(i32 %x, i16* %p0) {		define void @i32_to_i16_wrong_addr(i32 %x, i16* %p0) {
; CHECK-LABEL: i32_to_i16_wrong_addr:		; CHECK-LABEL: i32_to_i16_wrong_addr:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #16		; CHECK-NEXT: lsr w8, w0, #16
; CHECK-NEXT: strh w8, [x1, #4]		; CHECK-NEXT: strh w8, [x1, #4]
; CHECK-NEXT: strh w0, [x1]		; CHECK-NEXT: strh w0, [x1]
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%sh1 = lshr i32 %x, 16		%sh1 = lshr i32 %x, 16
%t0 = trunc i32 %x to i16		%t0 = trunc i32 %x to i16
%t1 = trunc i32 %sh1 to i16		%t1 = trunc i32 %sh1 to i16
%p2 = getelementptr inbounds i16, i16* %p0, i64 2		%p2 = getelementptr inbounds i16, i16* %p0, i64 2
store i16 %t1, i16* %p2, align 2		store i16 %t1, i16* %p2, align 2
store i16 %t0, i16* %p0, align 2		store i16 %t0, i16* %p0, align 2
ret void		ret void
}		}

		; Negative test - addresses don't line up with shift amounts

define void @i32_to_i8_wrong_order(i32 %x, i8* %p0) {		define void @i32_to_i8_wrong_order(i32 %x, i8* %p0) {
; CHECK-LABEL: i32_to_i8_wrong_order:		; CHECK-LABEL: i32_to_i8_wrong_order:
; CHECK: // %bb.0:		; CHECK: // %bb.0:
; CHECK-NEXT: lsr w8, w0, #8		; CHECK-NEXT: lsr w8, w0, #8
; CHECK-NEXT: lsr w9, w0, #16		; CHECK-NEXT: lsr w9, w0, #16
; CHECK-NEXT: lsr w10, w0, #24		; CHECK-NEXT: lsr w10, w0, #24
; CHECK-NEXT: strb w0, [x1, #3]		; CHECK-NEXT: strb w0, [x1, #3]
; CHECK-NEXT: strb w10, [x1, #1]		; CHECK-NEXT: strb w10, [x1, #1]
Show All 19 Lines

llvm/test/CodeGen/X86/stores-merging.ll

Show First 20 Lines • Show All 462 Lines • ▼ Show 20 Lines	; CHECK-NEXT: retq
%p3 = getelementptr inbounds i8, i8* %p, i64 3		%p3 = getelementptr inbounds i8, i8* %p, i64 3
store i8 %t4, i8* %p3, align 1		store i8 %t4, i8* %p3, align 1
ret void		ret void
}		}

define void @trunc_i32_to_i16(i32 %x, i16* %p) {		define void @trunc_i32_to_i16(i32 %x, i16* %p) {
; CHECK-LABEL: trunc_i32_to_i16:		; CHECK-LABEL: trunc_i32_to_i16:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movw %di, (%rsi)		; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: shrl $16, %edi
; CHECK-NEXT: movw %di, 2(%rsi)
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%t1 = trunc i32 %x to i16		%t1 = trunc i32 %x to i16
%sh = lshr i32 %x, 16		%sh = lshr i32 %x, 16
%t2 = trunc i32 %sh to i16		%t2 = trunc i32 %sh to i16
store i16 %t1, i16* %p, align 2		store i16 %t1, i16* %p, align 2
%p1 = getelementptr inbounds i16, i16* %p, i64 1		%p1 = getelementptr inbounds i16, i16* %p, i64 1
store i16 %t2, i16* %p1, align 2		store i16 %t2, i16* %p1, align 2
ret void		ret void
Show All 35 Lines	; CHECK-NEXT: retq
%p7 = getelementptr inbounds i8, i8* %p, i64 7		%p7 = getelementptr inbounds i8, i8* %p, i64 7
store i8 %t8, i8* %p7, align 1		store i8 %t8, i8* %p7, align 1
ret void		ret void
}		}

define void @trunc_i64_to_i16(i64 %x, i16* %p) {		define void @trunc_i64_to_i16(i64 %x, i16* %p) {
; CHECK-LABEL: trunc_i64_to_i16:		; CHECK-LABEL: trunc_i64_to_i16:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax		; CHECK-NEXT: movq %rdi, (%rsi)
; CHECK-NEXT: movq %rdi, %rcx
; CHECK-NEXT: movw %di, (%rsi)
; CHECK-NEXT: shrq $16, %rdi
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: shrq $48, %rcx
; CHECK-NEXT: movw %di, 2(%rsi)
; CHECK-NEXT: movw %ax, 4(%rsi)
; CHECK-NEXT: movw %cx, 6(%rsi)
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%t1 = trunc i64 %x to i16		%t1 = trunc i64 %x to i16
%sh1 = lshr i64 %x, 16		%sh1 = lshr i64 %x, 16
%t2 = trunc i64 %sh1 to i16		%t2 = trunc i64 %sh1 to i16
%sh2 = lshr i64 %x, 32		%sh2 = lshr i64 %x, 32
%t3 = trunc i64 %sh2 to i16		%t3 = trunc i64 %sh2 to i16
%sh3 = lshr i64 %x, 48		%sh3 = lshr i64 %x, 48
%t4 = trunc i64 %sh3 to i16		%t4 = trunc i64 %sh3 to i16
store i16 %t1, i16* %p, align 2		store i16 %t1, i16* %p, align 2
%p1 = getelementptr inbounds i16, i16* %p, i64 1		%p1 = getelementptr inbounds i16, i16* %p, i64 1
store i16 %t2, i16* %p1, align 2		store i16 %t2, i16* %p1, align 2
%p2 = getelementptr inbounds i16, i16* %p, i64 2		%p2 = getelementptr inbounds i16, i16* %p, i64 2
store i16 %t3, i16* %p2, align 2		store i16 %t3, i16* %p2, align 2
%p3 = getelementptr inbounds i16, i16* %p, i64 3		%p3 = getelementptr inbounds i16, i16* %p, i64 3
store i16 %t4, i16* %p3, align 2		store i16 %t4, i16* %p3, align 2
ret void		ret void
}		}

define void @trunc_i64_to_i32(i64 %x, i32* %p) {		define void @trunc_i64_to_i32(i64 %x, i32* %p) {
; CHECK-LABEL: trunc_i64_to_i32:		; CHECK-LABEL: trunc_i64_to_i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, (%rsi)		; CHECK-NEXT: movq %rdi, (%rsi)
; CHECK-NEXT: shrq $32, %rdi
; CHECK-NEXT: movl %edi, 4(%rsi)
; CHECK-NEXT: retq		; CHECK-NEXT: retq
%t1 = trunc i64 %x to i32		%t1 = trunc i64 %x to i32
%sh = lshr i64 %x, 32		%sh = lshr i64 %x, 32
%t2 = trunc i64 %sh to i32		%t2 = trunc i64 %sh to i32
store i32 %t1, i32* %p, align 4		store i32 %t1, i32* %p, align 4
%p1 = getelementptr inbounds i32, i32* %p, i64 1		%p1 = getelementptr inbounds i32, i32* %p, i64 1
store i32 %t2, i32* %p1, align 4		store i32 %t2, i32* %p1, align 4
ret void		ret void
}		}