diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9646,6 +9646,30 @@ return SimplifySelect(DL, N0, N1, N2); } + if (N1.getOpcode() == N2.getOpcode() && TLI.isBinOp(N1.getOpcode()) && + N->isOnlyUserOf(N0.getNode()) && N->isOnlyUserOf(N1.getNode())) { + // Fold select(cond, binop(x, y), binop(z, y)) + // --> binop(select(cond, x, z), y) + if (N1.getOperand(1) == N2.getOperand(1)) { + SDValue NewSel = + DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); + return DAG.getNode(N1.getOpcode(), DL, VT, NewSel, N1.getOperand(1)); + } + + // Fold select(cond, binop(x, y), binop(x, z)) + // --> binop(x, select(cond, y, z)) + // Second op VT might be different (e.g. shift amount type) + if (N1.getOperand(0) == N2.getOperand(0) && + VT == N1.getOperand(1).getValueType() && + VT == N2.getOperand(1).getValueType()) { + SDValue NewSel = + DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); + return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0), NewSel); + } + + // TODO: Handle isCommutativeBinOp as well ? + } + return SDValue(); } diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll --- a/llvm/test/CodeGen/NVPTX/fast-math.ll +++ b/llvm/test/CodeGen/NVPTX/fast-math.ll @@ -146,10 +146,8 @@ ; CHECK-LABEL: repeated_div_recip_allowed define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) { -; CHECK: rcp.rn.f32 -; CHECK: mul.rn.f32 -; CHECK: mul.rn.f32 ; CHECK: selp.f32 +; CHECK: div.rn.f32 %x = fdiv arcp float %a, %divisor %y = fdiv arcp float %b, %divisor %z = select i1 %pred, float %x, float %y @@ -158,10 +156,8 @@ ; CHECK-LABEL: repeated_div_recip_allowed_ftz define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 { -; CHECK: rcp.rn.ftz.f32 -; CHECK: mul.rn.ftz.f32 -; CHECK: mul.rn.ftz.f32 ; CHECK: selp.f32 +; CHECK: div.rn.ftz.f32 %x = fdiv arcp float %a, %divisor %y = fdiv arcp float %b, %divisor %z = select i1 %pred, float %x, float %y @@ -170,10 +166,8 @@ ; CHECK-LABEL: repeated_div_fast define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 { -; CHECK: rcp.approx.f32 -; CHECK: mul.f32 -; CHECK: mul.f32 ; CHECK: selp.f32 +; CHECK: div.approx.f32 %x = fdiv float %a, %divisor %y = fdiv float %b, %divisor %z = select i1 %pred, float %x, float %y @@ -182,10 +176,8 @@ ; CHECK-LABEL: repeated_div_fast_ftz define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 { -; CHECK: rcp.approx.ftz.f32 -; CHECK: mul.ftz.f32 -; CHECK: mul.ftz.f32 ; CHECK: selp.f32 +; CHECK: div.approx.ftz.f32 %x = fdiv float %a, %divisor %y = fdiv float %b, %divisor %z = select i1 %pred, float %x, float %y diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -8,37 +8,40 @@ %0 = type { i64, i32 } define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind { -; CHECK-LABEL: test1: -; CHECK: ## %bb.0: -; CHECK-NEXT: addq $8, %rdi -; CHECK-NEXT: addq $8, %rsi -; CHECK-NEXT: testb $1, %dl -; CHECK-NEXT: cmovneq %rdi, %rsi -; CHECK-NEXT: movl (%rsi), %eax -; CHECK-NEXT: retq +; GENERIC-LABEL: test1: +; GENERIC: ## %bb.0: +; GENERIC-NEXT: testb $1, %dl +; GENERIC-NEXT: cmoveq %rsi, %rdi +; GENERIC-NEXT: movl 8(%rdi), %eax +; GENERIC-NEXT: retq +; +; ATOM-LABEL: test1: +; ATOM: ## %bb.0: +; ATOM-NEXT: testb $1, %dl +; ATOM-NEXT: cmoveq %rsi, %rdi +; ATOM-NEXT: movl 8(%rdi), %eax +; ATOM-NEXT: nop +; ATOM-NEXT: nop +; ATOM-NEXT: retq ; ; ATHLON-LABEL: test1: ; ATHLON: ## %bb.0: -; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax -; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx -; ATHLON-NEXT: addl $8, %ecx -; ATHLON-NEXT: addl $8, %eax ; ATHLON-NEXT: testb $1, {{[0-9]+}}(%esp) -; ATHLON-NEXT: cmovnel %ecx, %eax -; ATHLON-NEXT: movl (%eax), %eax +; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %eax +; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %ecx +; ATHLON-NEXT: cmovnel %eax, %ecx +; ATHLON-NEXT: movl (%ecx), %eax +; ATHLON-NEXT: movl 8(%eax), %eax ; ATHLON-NEXT: retl ; ; MCU-LABEL: test1: ; MCU: # %bb.0: ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: jne .LBB0_1 -; MCU-NEXT: # %bb.2: -; MCU-NEXT: addl $8, %edx -; MCU-NEXT: movl (%edx), %eax -; MCU-NEXT: retl -; MCU-NEXT: .LBB0_1: -; MCU-NEXT: addl $8, %eax -; MCU-NEXT: movl (%eax), %eax +; MCU-NEXT: jne .LBB0_2 +; MCU-NEXT: # %bb.1: +; MCU-NEXT: movl %edx, %eax +; MCU-NEXT: .LBB0_2: +; MCU-NEXT: movl 8(%eax), %eax ; MCU-NEXT: retl %t0 = load %0, %0* %p %t1 = load %0, %0* %q