Page MenuHomePhabricator

D59602.diff
No OneTemporary

File Metadata

Created
Sun, Aug 18, 6:51 AM

D59602.diff

Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
@@ -1181,10 +1181,14 @@
if (!MathDominates && !DT.dominates(Cmp, BO))
return false;
- // Check that the insertion doesn't create a value that is live across more
- // than two blocks, so to minimise the increase in register pressure.
BasicBlock *MathBB = BO->getParent(), *CmpBB = Cmp->getParent();
if (MathBB != CmpBB) {
+ // Avoid hoisting an extra op into a dominating block and creating a
+ // potentially longer critical path.
+ if (!MathDominates)
+ return false;
+ // Check that the insertion doesn't create a value that is live across
+ // more than two blocks, so to minimise the increase in register pressure.
BasicBlock *Dominator = MathDominates ? MathBB : CmpBB;
BasicBlock *Dominated = MathDominates ? CmpBB : MathBB;
auto Successors = successors(Dominator);
Index: llvm/trunk/test/CodeGen/X86/cgp-usubo.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/cgp-usubo.ll
+++ llvm/trunk/test/CodeGen/X86/cgp-usubo.ll
@@ -211,16 +211,16 @@
; CHECK-LABEL: PR41129:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: subq $1, %rcx
-; CHECK-NEXT: jae .LBB10_1
-; CHECK-NEXT: # %bb.2: # %true
-; CHECK-NEXT: movq %rcx, (%rdi)
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB10_1: # %false
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: je .LBB10_2
+; CHECK-NEXT: # %bb.1: # %false
; CHECK-NEXT: andl $7, %eax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB10_2: # %true
+; CHECK-NEXT: decq %rax
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
entry:
%key = load i64, i64* %p64, align 8
%cond17 = icmp eq i64 %key, 0
Index: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -16,11 +16,11 @@
; GENERIC-NEXT: movl (%rdx), %eax
; GENERIC-NEXT: movl 4(%rdx), %ebx
; GENERIC-NEXT: decl %ecx
-; GENERIC-NEXT: leaq 20(%rdx), %r11
+; GENERIC-NEXT: leaq 20(%rdx), %r14
; GENERIC-NEXT: movq _Te0@{{.*}}(%rip), %r9
; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8
; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10
-; GENERIC-NEXT: movq %rcx, %r14
+; GENERIC-NEXT: movq %rcx, %r11
; GENERIC-NEXT: jmp LBB0_1
; GENERIC-NEXT: .p2align 4, 0x90
; GENERIC-NEXT: LBB0_2: ## %bb1
@@ -29,13 +29,14 @@
; GENERIC-NEXT: shrl $16, %ebx
; GENERIC-NEXT: movzbl %bl, %ebx
; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax
-; GENERIC-NEXT: xorl -4(%r11), %eax
+; GENERIC-NEXT: xorl -4(%r14), %eax
; GENERIC-NEXT: shrl $24, %edi
; GENERIC-NEXT: movzbl %bpl, %ebx
; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx
; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx
-; GENERIC-NEXT: xorl (%r11), %ebx
-; GENERIC-NEXT: addq $16, %r11
+; GENERIC-NEXT: xorl (%r14), %ebx
+; GENERIC-NEXT: decq %r11
+; GENERIC-NEXT: addq $16, %r14
; GENERIC-NEXT: LBB0_1: ## %bb
; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1
; GENERIC-NEXT: movzbl %al, %edi
@@ -46,16 +47,16 @@
; GENERIC-NEXT: movzbl %bpl, %ebp
; GENERIC-NEXT: movl (%r8,%rbp,4), %ebp
; GENERIC-NEXT: xorl (%r9,%rax,4), %ebp
-; GENERIC-NEXT: xorl -12(%r11), %ebp
+; GENERIC-NEXT: xorl -12(%r14), %ebp
; GENERIC-NEXT: shrl $24, %ebx
; GENERIC-NEXT: movl (%r10,%rdi,4), %edi
; GENERIC-NEXT: xorl (%r9,%rbx,4), %edi
-; GENERIC-NEXT: xorl -8(%r11), %edi
+; GENERIC-NEXT: xorl -8(%r14), %edi
; GENERIC-NEXT: movl %ebp, %eax
; GENERIC-NEXT: shrl $24, %eax
; GENERIC-NEXT: movl (%r9,%rax,4), %eax
-; GENERIC-NEXT: subq $1, %r14
-; GENERIC-NEXT: jae LBB0_2
+; GENERIC-NEXT: testq %r11, %r11
+; GENERIC-NEXT: jne LBB0_2
; GENERIC-NEXT: ## %bb.3: ## %bb2
; GENERIC-NEXT: shlq $4, %rcx
; GENERIC-NEXT: andl $-16777216, %eax ## imm = 0xFF000000
@@ -98,26 +99,27 @@
; ATOM-NEXT: ## kill: def $ecx killed $ecx def $rcx
; ATOM-NEXT: movl (%rdx), %r15d
; ATOM-NEXT: movl 4(%rdx), %eax
-; ATOM-NEXT: leaq 20(%rdx), %r11
+; ATOM-NEXT: leaq 20(%rdx), %r14
; ATOM-NEXT: movq _Te0@{{.*}}(%rip), %r9
; ATOM-NEXT: movq _Te1@{{.*}}(%rip), %r8
; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10
; ATOM-NEXT: decl %ecx
-; ATOM-NEXT: movq %rcx, %r14
+; ATOM-NEXT: movq %rcx, %r11
; ATOM-NEXT: jmp LBB0_1
; ATOM-NEXT: .p2align 4, 0x90
; ATOM-NEXT: LBB0_2: ## %bb1
; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1
; ATOM-NEXT: shrl $16, %eax
; ATOM-NEXT: shrl $24, %edi
-; ATOM-NEXT: movzbl %al, %eax
-; ATOM-NEXT: xorl (%r8,%rax,4), %r15d
+; ATOM-NEXT: decq %r11
+; ATOM-NEXT: movzbl %al, %ebp
; ATOM-NEXT: movzbl %bl, %eax
; ATOM-NEXT: movl (%r10,%rax,4), %eax
-; ATOM-NEXT: xorl -4(%r11), %r15d
+; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d
; ATOM-NEXT: xorl (%r9,%rdi,4), %eax
-; ATOM-NEXT: xorl (%r11), %eax
-; ATOM-NEXT: addq $16, %r11
+; ATOM-NEXT: xorl -4(%r14), %r15d
+; ATOM-NEXT: xorl (%r14), %eax
+; ATOM-NEXT: addq $16, %r14
; ATOM-NEXT: LBB0_1: ## %bb
; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1
; ATOM-NEXT: movl %eax, %edi
@@ -130,15 +132,15 @@
; ATOM-NEXT: movzbl %r15b, %edi
; ATOM-NEXT: xorl (%r9,%rbp,4), %ebx
; ATOM-NEXT: movl (%r10,%rdi,4), %edi
-; ATOM-NEXT: xorl -12(%r11), %ebx
+; ATOM-NEXT: xorl -12(%r14), %ebx
; ATOM-NEXT: xorl (%r9,%rax,4), %edi
; ATOM-NEXT: movl %ebx, %eax
-; ATOM-NEXT: xorl -8(%r11), %edi
+; ATOM-NEXT: xorl -8(%r14), %edi
; ATOM-NEXT: shrl $24, %eax
; ATOM-NEXT: movl (%r9,%rax,4), %r15d
-; ATOM-NEXT: subq $1, %r14
+; ATOM-NEXT: testq %r11, %r11
; ATOM-NEXT: movl %edi, %eax
-; ATOM-NEXT: jae LBB0_2
+; ATOM-NEXT: jne LBB0_2
; ATOM-NEXT: ## %bb.3: ## %bb2
; ATOM-NEXT: shrl $16, %eax
; ATOM-NEXT: shrl $8, %edi
Index: llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
===================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -475,23 +475,22 @@
ret i64 %sub
}
-; When the compare operand has uses besides add/sub,
-; the transform may not be profitable.
+; Avoid hoisting a math op into a dominating block which would
+; increase the critical path.
define void @PR41129(i64* %p64) {
; CHECK-LABEL: @PR41129(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[KEY:%.*]] = load i64, i64* [[P64:%.*]], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[KEY]], i64 1)
-; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT: br i1 [[OV]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK-NEXT: [[COND17:%.*]] = icmp eq i64 [[KEY]], 0
+; CHECK-NEXT: br i1 [[COND17]], label [[TRUE:%.*]], label [[FALSE:%.*]]
; CHECK: false:
; CHECK-NEXT: [[ANDVAL:%.*]] = and i64 [[KEY]], 7
; CHECK-NEXT: store i64 [[ANDVAL]], i64* [[P64]]
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: true:
-; CHECK-NEXT: store i64 [[MATH]], i64* [[P64]]
+; CHECK-NEXT: [[SVALUE:%.*]] = add i64 [[KEY]], -1
+; CHECK-NEXT: store i64 [[SVALUE]], i64* [[P64]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void

Event Timeline