Skip to content

Commit 4e6dcf7

Browse files
committedJul 23, 2017
[DAG] Fix typo preventing some stores merges to truncated stores.
Check the actual memory type stored and not the extended value size when considering if truncated store merge is worthwhile. Reviewers: efriedma, RKSimon, spatel, jyknight Reviewed By: efriedma Subscribers: llvm-commits, nhaehnle Differential Revision: https://reviews.llvm.org/D35623 llvm-svn: 308833
1 parent 6876680 commit 4e6dcf7

File tree

3 files changed

+9
-12
lines changed

3 files changed

+9
-12
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -12866,8 +12866,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
1286612866
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
1286712867
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
1286812868
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
12869-
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12870-
FirstStoreAS, FirstStoreAlign, &IsFast) &&
12869+
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
12870+
FirstStoreAlign, &IsFast) &&
1287112871
IsFast) {
1287212872
LastIntegerTrunc = true;
1287312873
LastLegalType = i + 1;
@@ -13098,8 +13098,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
1309813098
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
1309913099
StoreTy) &&
1310013100
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
13101-
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
13102-
FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
13101+
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
13102+
FirstStoreAlign, &IsFastSt) &&
1310313103
IsFastSt &&
1310413104
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
1310513105
FirstLoadAlign, &IsFastLd) &&

‎llvm/test/CodeGen/AMDGPU/merge-stores.ll

+2-4
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010

1111

1212
; GCN-LABEL: {{^}}merge_global_store_2_constants_i8:
13-
; GCN: buffer_store_byte
14-
; GCN: buffer_store_byte
13+
; GCN: buffer_store_short
1514
; GCN: s_endpgm
1615
define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
1716
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
@@ -489,8 +488,7 @@ define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addr
489488
}
490489

491490
; GCN-LABEL: {{^}}merge_local_store_2_constants_i8:
492-
; GCN: ds_write_b8
493-
; GCN: ds_write_b8
491+
; GCN: ds_write_b16
494492
; GCN: s_endpgm
495493
define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
496494
%out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1

‎llvm/test/CodeGen/BPF/undef.ll

+3-4
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@ define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 s
2323
; EL: r1 = 134678021
2424
; EB: r1 = 84281096
2525
; CHECK: *(u32 *)(r10 - 8) = r1
26-
; CHECK: r1 = 9
27-
; CHECK: *(u8 *)(r10 - 4) = r1
28-
; CHECK: r1 = 10
29-
; CHECK: *(u8 *)(r10 - 3) = r1
26+
; EL: r1 = 2569
27+
; EB: r1 = 2314
28+
; CHECK: *(u16 *)(r10 - 4) = r1
3029
; CHECK: *(u16 *)(r10 + 24) = r2
3130
; CHECK: *(u16 *)(r10 + 22) = r2
3231
; CHECK: *(u16 *)(r10 + 20) = r2

0 commit comments

Comments
 (0)
Please sign in to comment.