Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1181,10 +1181,14 @@ if (!MathDominates && !DT.dominates(Cmp, BO)) return false; - // Check that the insertion doesn't create a value that is live across more - // than two blocks, so to minimise the increase in register pressure. BasicBlock *MathBB = BO->getParent(), *CmpBB = Cmp->getParent(); if (MathBB != CmpBB) { + // Avoid hoisting an extra op into a dominating block and creating a + // potentially longer critical path. + if (!MathDominates) + return false; + // Check that the insertion doesn't create a value that is live across + // more than two blocks, so to minimise the increase in register pressure. BasicBlock *Dominator = MathDominates ? MathBB : CmpBB; BasicBlock *Dominated = MathDominates ? CmpBB : MathBB; auto Successors = successors(Dominator); Index: llvm/test/CodeGen/X86/cgp-usubo.ll =================================================================== --- llvm/test/CodeGen/X86/cgp-usubo.ll +++ llvm/test/CodeGen/X86/cgp-usubo.ll @@ -211,16 +211,16 @@ ; CHECK-LABEL: PR41129: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: movq %rax, %rcx -; CHECK-NEXT: subq $1, %rcx -; CHECK-NEXT: jae .LBB10_1 -; CHECK-NEXT: # %bb.2: # %true -; CHECK-NEXT: movq %rcx, (%rdi) -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB10_1: # %false +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je .LBB10_2 +; CHECK-NEXT: # %bb.1: # %false ; CHECK-NEXT: andl $7, %eax ; CHECK-NEXT: movq %rax, (%rdi) ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB10_2: # %true +; CHECK-NEXT: decq %rax +; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: retq entry: %key = load i64, i64* %p64, align 8 %cond17 = icmp eq i64 %key, 0 Index: llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll =================================================================== --- llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -16,11 +16,11 @@ ; GENERIC-NEXT: movl (%rdx), %eax ; GENERIC-NEXT: movl 4(%rdx), %ebx ; GENERIC-NEXT: decl %ecx -; GENERIC-NEXT: leaq 20(%rdx), %r11 +; GENERIC-NEXT: leaq 20(%rdx), %r14 ; GENERIC-NEXT: movq _Te0@{{.*}}(%rip), %r9 ; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8 ; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10 -; GENERIC-NEXT: movq %rcx, %r14 +; GENERIC-NEXT: movq %rcx, %r11 ; GENERIC-NEXT: jmp LBB0_1 ; GENERIC-NEXT: .p2align 4, 0x90 ; GENERIC-NEXT: LBB0_2: ## %bb1 @@ -29,13 +29,14 @@ ; GENERIC-NEXT: shrl $16, %ebx ; GENERIC-NEXT: movzbl %bl, %ebx ; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax -; GENERIC-NEXT: xorl -4(%r11), %eax +; GENERIC-NEXT: xorl -4(%r14), %eax ; GENERIC-NEXT: shrl $24, %edi ; GENERIC-NEXT: movzbl %bpl, %ebx ; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx ; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx -; GENERIC-NEXT: xorl (%r11), %ebx -; GENERIC-NEXT: addq $16, %r11 +; GENERIC-NEXT: xorl (%r14), %ebx +; GENERIC-NEXT: decq %r11 +; GENERIC-NEXT: addq $16, %r14 ; GENERIC-NEXT: LBB0_1: ## %bb ; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1 ; GENERIC-NEXT: movzbl %al, %edi @@ -46,16 +47,16 @@ ; GENERIC-NEXT: movzbl %bpl, %ebp ; GENERIC-NEXT: movl (%r8,%rbp,4), %ebp ; GENERIC-NEXT: xorl (%r9,%rax,4), %ebp -; GENERIC-NEXT: xorl -12(%r11), %ebp +; GENERIC-NEXT: xorl -12(%r14), %ebp ; GENERIC-NEXT: shrl $24, %ebx ; GENERIC-NEXT: movl (%r10,%rdi,4), %edi ; GENERIC-NEXT: xorl (%r9,%rbx,4), %edi -; GENERIC-NEXT: xorl -8(%r11), %edi +; GENERIC-NEXT: xorl -8(%r14), %edi ; GENERIC-NEXT: movl %ebp, %eax ; GENERIC-NEXT: shrl $24, %eax ; GENERIC-NEXT: movl (%r9,%rax,4), %eax -; GENERIC-NEXT: subq $1, %r14 -; GENERIC-NEXT: jae LBB0_2 +; GENERIC-NEXT: testq %r11, %r11 +; GENERIC-NEXT: jne LBB0_2 ; GENERIC-NEXT: ## %bb.3: ## %bb2 ; GENERIC-NEXT: shlq $4, %rcx ; GENERIC-NEXT: andl $-16777216, %eax ## imm = 0xFF000000 @@ -98,26 +99,27 @@ ; ATOM-NEXT: ## kill: def $ecx killed $ecx def $rcx ; ATOM-NEXT: movl (%rdx), %r15d ; ATOM-NEXT: movl 4(%rdx), %eax -; ATOM-NEXT: leaq 20(%rdx), %r11 +; ATOM-NEXT: leaq 20(%rdx), %r14 ; ATOM-NEXT: movq _Te0@{{.*}}(%rip), %r9 ; ATOM-NEXT: movq _Te1@{{.*}}(%rip), %r8 ; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10 ; ATOM-NEXT: decl %ecx -; ATOM-NEXT: movq %rcx, %r14 +; ATOM-NEXT: movq %rcx, %r11 ; ATOM-NEXT: jmp LBB0_1 ; ATOM-NEXT: .p2align 4, 0x90 ; ATOM-NEXT: LBB0_2: ## %bb1 ; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1 ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: shrl $24, %edi -; ATOM-NEXT: movzbl %al, %eax -; ATOM-NEXT: xorl (%r8,%rax,4), %r15d +; ATOM-NEXT: decq %r11 +; ATOM-NEXT: movzbl %al, %ebp ; ATOM-NEXT: movzbl %bl, %eax ; ATOM-NEXT: movl (%r10,%rax,4), %eax -; ATOM-NEXT: xorl -4(%r11), %r15d +; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d ; ATOM-NEXT: xorl (%r9,%rdi,4), %eax -; ATOM-NEXT: xorl (%r11), %eax -; ATOM-NEXT: addq $16, %r11 +; ATOM-NEXT: xorl -4(%r14), %r15d +; ATOM-NEXT: xorl (%r14), %eax +; ATOM-NEXT: addq $16, %r14 ; ATOM-NEXT: LBB0_1: ## %bb ; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1 ; ATOM-NEXT: movl %eax, %edi @@ -130,15 +132,15 @@ ; ATOM-NEXT: movzbl %r15b, %edi ; ATOM-NEXT: xorl (%r9,%rbp,4), %ebx ; ATOM-NEXT: movl (%r10,%rdi,4), %edi -; ATOM-NEXT: xorl -12(%r11), %ebx +; ATOM-NEXT: xorl -12(%r14), %ebx ; ATOM-NEXT: xorl (%r9,%rax,4), %edi ; ATOM-NEXT: movl %ebx, %eax -; ATOM-NEXT: xorl -8(%r11), %edi +; ATOM-NEXT: xorl -8(%r14), %edi ; ATOM-NEXT: shrl $24, %eax ; ATOM-NEXT: movl (%r9,%rax,4), %r15d -; ATOM-NEXT: subq $1, %r14 +; ATOM-NEXT: testq %r11, %r11 ; ATOM-NEXT: movl %edi, %eax -; ATOM-NEXT: jae LBB0_2 +; ATOM-NEXT: jne LBB0_2 ; ATOM-NEXT: ## %bb.3: ## %bb2 ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: shrl $8, %edi Index: llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll +++ llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -475,23 +475,22 @@ ret i64 %sub } -; When the compare operand has uses besides add/sub, -; the transform may not be profitable. +; Avoid hoisting a math op into a dominating block which would +; increase the critical path. define void @PR41129(i64* %p64) { ; CHECK-LABEL: @PR41129( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[KEY:%.*]] = load i64, i64* [[P64:%.*]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[KEY]], i64 1) -; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 -; CHECK-NEXT: br i1 [[OV]], label [[TRUE:%.*]], label [[FALSE:%.*]] +; CHECK-NEXT: [[COND17:%.*]] = icmp eq i64 [[KEY]], 0 +; CHECK-NEXT: br i1 [[COND17]], label [[TRUE:%.*]], label [[FALSE:%.*]] ; CHECK: false: ; CHECK-NEXT: [[ANDVAL:%.*]] = and i64 [[KEY]], 7 ; CHECK-NEXT: store i64 [[ANDVAL]], i64* [[P64]] ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: true: -; CHECK-NEXT: store i64 [[MATH]], i64* [[P64]] +; CHECK-NEXT: [[SVALUE:%.*]] = add i64 [[KEY]], -1 +; CHECK-NEXT: store i64 [[SVALUE]], i64* [[P64]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void