Skip to content

Commit 8ce1e3b

Browse files
committedJun 7, 2017
[CGP] avoid zext/trunc of a memcmp expansion compare
This could be viewed as another shortcoming of the DAGCombiner: when both operands of a compare are zexted from the same source type, we should be able to compare the original types. The effect on PowerPC perf is likely unnoticeable, but there's a visible regression for x86 if we feed the suboptimal IR for memcmp expansion to the DAG: _cmp_eq4_zexted_to_i64: movl (%rdi), %ecx movl (%rsi), %edx xorl %eax, %eax cmpq %rdx, %rcx sete %al _cmp_eq4_better: movl (%rdi), %ecx xorl %eax, %eax cmpl (%rsi), %ecx sete %al llvm-svn: 304923
1 parent 5a2f881 commit 8ce1e3b

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed
 

‎llvm/lib/CodeGen/CodeGenPrepare.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -1841,11 +1841,11 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
18411841
// Load LoadSizeType from the base address.
18421842
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
18431843
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
1844-
if (LoadSizeType != MaxLoadType) {
1845-
LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
1846-
LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
1847-
}
18481844
if (NumLoads != 1) {
1845+
if (LoadSizeType != MaxLoadType) {
1846+
LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
1847+
LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
1848+
}
18491849
// If we have multiple loads per block, we need to generate a composite
18501850
// comparison using xor+or.
18511851
Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);

‎llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
2121
; CHECK-NEXT: lwz 3, 0(3)
2222
; CHECK-NEXT: lwz 4, 0(4)
2323
; CHECK-NEXT: li 5, 1
24-
; CHECK-NEXT: cmpld 3, 4
24+
; CHECK-NEXT: cmplw 3, 4
2525
; CHECK-NEXT: isel 3, 0, 5, 2
2626
; CHECK-NEXT: clrldi 3, 3, 32
2727
; CHECK-NEXT: blr
@@ -64,17 +64,17 @@ define signext i32 @zeroEqualityTest03(i8* %x, i8* %y) {
6464
; CHECK: # BB#0: # %loadbb
6565
; CHECK-NEXT: lwz 5, 0(3)
6666
; CHECK-NEXT: lwz 6, 0(4)
67-
; CHECK-NEXT: cmpld 5, 6
67+
; CHECK-NEXT: cmplw 5, 6
6868
; CHECK-NEXT: bne 0, .LBB2_3
6969
; CHECK-NEXT: # BB#1: # %loadbb1
7070
; CHECK-NEXT: lhz 5, 4(3)
7171
; CHECK-NEXT: lhz 6, 4(4)
72-
; CHECK-NEXT: cmpld 5, 6
72+
; CHECK-NEXT: cmplw 5, 6
7373
; CHECK-NEXT: bne 0, .LBB2_3
7474
; CHECK-NEXT: # BB#2: # %loadbb2
7575
; CHECK-NEXT: lbz 3, 6(3)
7676
; CHECK-NEXT: lbz 4, 6(4)
77-
; CHECK-NEXT: cmpld 3, 4
77+
; CHECK-NEXT: cmplw 3, 4
7878
; CHECK-NEXT: li 3, 0
7979
; CHECK-NEXT: beq 0, .LBB2_4
8080
; CHECK-NEXT: .LBB2_3: # %res_block

0 commit comments

Comments
 (0)
Please sign in to comment.