Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2497,6 +2497,13 @@ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); } + // Hoist one-use subtraction from non-opaque constant: + // (C - x) + y -> (y - x) + C + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + } // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' // rather than 'add 0/-1' (the zext should get folded). Index: llvm/trunk/test/CodeGen/AArch64/shift-amount-mod.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/shift-amount-mod.ll +++ llvm/trunk/test/CodeGen/AArch64/shift-amount-mod.ll @@ -375,9 +375,7 @@ define i32 @reg32_lshr_by_add_to_negated(i32 %val, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: reg32_lshr_by_add_to_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w8, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 ; CHECK-NEXT: lsr w0, w0, w8 ; CHECK-NEXT: ret %nega = sub i32 32, %a @@ -388,9 +386,7 @@ define i64 @reg64_lshr_by_add_to_negated(i64 %val, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: reg64_lshr_by_add_to_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: sub x8, x8, x1 -; CHECK-NEXT: add x8, x8, x2 +; CHECK-NEXT: sub x8, x2, x1 ; CHECK-NEXT: lsr x0, x0, x8 ; CHECK-NEXT: ret %nega = sub i64 64, %a Index: llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -57,9 +57,8 @@ define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) { ; CHECK-LABEL: sink_sub_from_const_to_add0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: sub w8, w1, w0 +; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 32, %a %r = add i32 %t0, %b @@ -68,9 +67,8 @@ define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) { ; CHECK-LABEL: sink_sub_from_const_to_add1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: add w0, w1, w8 +; CHECK-NEXT: sub w8, w1, w0 +; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 32, %a %r = add i32 %b, %t0 @@ -117,9 +115,8 @@ define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) { ; CHECK-LABEL: sink_sub_of_const_to_sub2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: sub w8, w8, w0 -; CHECK-NEXT: add w0, w1, w8 +; CHECK-NEXT: sub w8, w1, w0 +; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 %a, 32 %r = sub i32 %b, %t0 @@ -219,8 +216,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %t0, %b @@ -231,8 +228,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %b, %t0 @@ -287,8 +284,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %b, %t0 Index: llvm/trunk/test/CodeGen/ARM/addsubcarry-promotion.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/addsubcarry-promotion.ll +++ llvm/trunk/test/CodeGen/ARM/addsubcarry-promotion.ll @@ -10,40 +10,63 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { ; ARM-LABEL: fn1: ; ARM: @ %bb.0: @ %entry -; ARM-NEXT: rsb r2, r2, #1 ; ARM-NEXT: adds r0, r1, r0 +; ARM-NEXT: mov r3, #0 +; ARM-NEXT: adc r0, r3, #0 ; ARM-NEXT: movw r1, #65535 -; ARM-NEXT: sxth r2, r2 -; ARM-NEXT: adc r0, r2, #0 -; ARM-NEXT: tst r0, r1 +; ARM-NEXT: sub r0, r0, r2 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: cmp r0, r1 ; ARM-NEXT: bxeq lr ; ARM-NEXT: .LBB0_1: @ %for.cond ; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 ; ARM-NEXT: b .LBB0_1 ; -; THUMB1-LABEL: fn1: -; THUMB1: @ %bb.0: @ %entry -; THUMB1-NEXT: movs r3, #1 -; THUMB1-NEXT: subs r2, r3, r2 -; THUMB1-NEXT: sxth r2, r2 -; THUMB1-NEXT: movs r3, #0 -; THUMB1-NEXT: adds r0, r1, r0 -; THUMB1-NEXT: adcs r3, r2 -; THUMB1-NEXT: lsls r0, r3, #16 -; THUMB1-NEXT: beq .LBB0_2 -; THUMB1-NEXT: .LBB0_1: @ %for.cond -; THUMB1-NEXT: @ =>This Inner Loop Header: Depth=1 -; THUMB1-NEXT: b .LBB0_1 -; THUMB1-NEXT: .LBB0_2: @ %if.end -; THUMB1-NEXT: bx lr +; THUMBV6M-LABEL: fn1: +; THUMBV6M: @ %bb.0: @ %entry +; THUMBV6M-NEXT: movs r3, #0 +; THUMBV6M-NEXT: adds r0, r1, r0 +; THUMBV6M-NEXT: adcs r3, r3 +; THUMBV6M-NEXT: subs r0, r3, r2 +; THUMBV6M-NEXT: uxth r0, r0 +; THUMBV6M-NEXT: ldr r1, .LCPI0_0 +; THUMBV6M-NEXT: cmp r0, r1 +; THUMBV6M-NEXT: beq .LBB0_2 +; THUMBV6M-NEXT: .LBB0_1: @ %for.cond +; THUMBV6M-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMBV6M-NEXT: b .LBB0_1 +; THUMBV6M-NEXT: .LBB0_2: @ %if.end +; THUMBV6M-NEXT: bx lr +; THUMBV6M-NEXT: .p2align 2 +; THUMBV6M-NEXT: @ %bb.3: +; THUMBV6M-NEXT: .LCPI0_0: +; THUMBV6M-NEXT: .long 65535 @ 0xffff +; +; THUMBV8M-BASE-LABEL: fn1: +; THUMBV8M-BASE: @ %bb.0: @ %entry +; THUMBV8M-BASE-NEXT: movs r3, #0 +; THUMBV8M-BASE-NEXT: adds r0, r1, r0 +; THUMBV8M-BASE-NEXT: adcs r3, r3 +; THUMBV8M-BASE-NEXT: subs r0, r3, r2 +; THUMBV8M-BASE-NEXT: uxth r0, r0 +; THUMBV8M-BASE-NEXT: movw r1, #65535 +; THUMBV8M-BASE-NEXT: cmp r0, r1 +; THUMBV8M-BASE-NEXT: beq .LBB0_2 +; THUMBV8M-BASE-NEXT: .LBB0_1: @ %for.cond +; THUMBV8M-BASE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMBV8M-BASE-NEXT: b .LBB0_1 +; THUMBV8M-BASE-NEXT: .LBB0_2: @ %if.end +; THUMBV8M-BASE-NEXT: bx lr ; ; THUMB-LABEL: fn1: ; THUMB: @ %bb.0: @ %entry -; THUMB-NEXT: rsb.w r2, r2, #1 ; THUMB-NEXT: adds r0, r0, r1 -; THUMB-NEXT: sxth r2, r2 -; THUMB-NEXT: adc r0, r2, #0 -; THUMB-NEXT: lsls r0, r0, #16 +; THUMB-NEXT: mov.w r3, #0 +; THUMB-NEXT: adc r0, r3, #0 +; THUMB-NEXT: movw r1, #65535 +; THUMB-NEXT: subs r0, r0, r2 +; THUMB-NEXT: uxth r0, r0 +; THUMB-NEXT: cmp r0, r1 ; THUMB-NEXT: it eq ; THUMB-NEXT: bxeq lr ; THUMB-NEXT: .LBB0_1: @ %for.cond Index: llvm/trunk/test/CodeGen/X86/shift-amount-mod.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shift-amount-mod.ll +++ llvm/trunk/test/CodeGen/X86/shift-amount-mod.ll @@ -861,19 +861,17 @@ ; X32-LABEL: reg32_lshr_by_add_to_negated: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $32, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: # kill: def $cl killed $cl killed $ecx ; X32-NEXT: shrl %cl, %eax ; X32-NEXT: retl ; ; X64-LABEL: reg32_lshr_by_add_to_negated: ; X64: # %bb.0: +; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl $32, %ecx ; X64-NEXT: subl %esi, %ecx -; X64-NEXT: addl %edx, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %eax ; X64-NEXT: retq @@ -888,9 +886,9 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl $64, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: addb $64, %cl ; X32-NEXT: movl %esi, %edx ; X32-NEXT: shrl %cl, %edx ; X32-NEXT: shrdl %cl, %esi, %eax @@ -905,11 +903,10 @@ ; ; X64-LABEL: reg64_lshr_by_add_to_negated: ; X64: # %bb.0: +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movl $64, %ecx ; X64-NEXT: subl %esi, %ecx -; X64-NEXT: addl %edx, %ecx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %nega = sub i64 64, %a @@ -1278,11 +1275,10 @@ ; ; X64-LABEL: reg64_lshr_by_negated_unfolded_add_b: ; X64: # %bb.0: +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movl $64, %ecx ; X64-NEXT: subl %esi, %ecx -; X64-NEXT: addl %edx, %ecx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %nega = sub i64 0, %a Index: llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll +++ llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll @@ -94,16 +94,16 @@ define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) { ; X32-LABEL: sink_sub_from_const_to_add0: ; X32: # %bb.0: -; X32-NEXT: movl $32, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_sub_from_const_to_add0: ; X64: # %bb.0: -; X64-NEXT: movl $32, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: addl %esi, %eax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: subl %edi, %esi +; X64-NEXT: leal 32(%rsi), %eax ; X64-NEXT: retq %t0 = sub i32 32, %a %r = add i32 %t0, %b @@ -112,16 +112,16 @@ define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) { ; X32-LABEL: sink_sub_from_const_to_add1: ; X32: # %bb.0: -; X32-NEXT: movl $32, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_sub_from_const_to_add1: ; X64: # %bb.0: -; X64-NEXT: movl $32, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: addl %esi, %eax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: subl %edi, %esi +; X64-NEXT: leal 32(%rsi), %eax ; X64-NEXT: retq %t0 = sub i32 32, %a %r = add i32 %b, %t0 @@ -192,16 +192,16 @@ define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) { ; X32-LABEL: sink_sub_of_const_to_sub2: ; X32: # %bb.0: -; X32-NEXT: movl $32, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: subl {{[0-9]+}}(%esp), %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_sub_of_const_to_sub2: ; X64: # %bb.0: -; X64-NEXT: movl $32, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: addl %esi, %eax +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: subl %edi, %esi +; X64-NEXT: leal 32(%rsi), %eax ; X64-NEXT: retq %t0 = sub i32 %a, 32 %r = sub i32 %b, %t0 @@ -329,25 +329,37 @@ ; Outer 'add' is commutative - 2 variants. define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: vec_sink_sub_from_const_to_add0: -; ALL: # %bb.0: -; ALL-NEXT: movdqa {{.*#+}} xmm2 = <42,24,u,46> -; ALL-NEXT: psubd %xmm0, %xmm2 -; ALL-NEXT: paddd %xmm1, %xmm2 -; ALL-NEXT: movdqa %xmm2, %xmm0 -; ALL-NEXT: ret{{[l|q]}} +; X32-LABEL: vec_sink_sub_from_const_to_add0: +; X32: # %bb.0: +; X32-NEXT: psubd %xmm0, %xmm1 +; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X32-NEXT: movdqa %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: vec_sink_sub_from_const_to_add0: +; X64: # %bb.0: +; X64-NEXT: psubd %xmm0, %xmm1 +; X64-NEXT: paddd {{.*}}(%rip), %xmm1 +; X64-NEXT: movdqa %xmm1, %xmm0 +; X64-NEXT: retq %t0 = sub <4 x i32> , %a %r = add <4 x i32> %t0, %b ret <4 x i32> %r } define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: vec_sink_sub_from_const_to_add1: -; ALL: # %bb.0: -; ALL-NEXT: movdqa {{.*#+}} xmm2 = <42,24,u,46> -; ALL-NEXT: psubd %xmm0, %xmm2 -; ALL-NEXT: paddd %xmm1, %xmm2 -; ALL-NEXT: movdqa %xmm2, %xmm0 -; ALL-NEXT: ret{{[l|q]}} +; X32-LABEL: vec_sink_sub_from_const_to_add1: +; X32: # %bb.0: +; X32-NEXT: psubd %xmm0, %xmm1 +; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X32-NEXT: movdqa %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: vec_sink_sub_from_const_to_add1: +; X64: # %bb.0: +; X64-NEXT: psubd %xmm0, %xmm1 +; X64-NEXT: paddd {{.*}}(%rip), %xmm1 +; X64-NEXT: movdqa %xmm1, %xmm0 +; X64-NEXT: retq %t0 = sub <4 x i32> , %a %r = add <4 x i32> %b, %t0 ret <4 x i32> %r @@ -411,13 +423,19 @@ ret <4 x i32> %r } define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) { -; ALL-LABEL: vec_sink_sub_of_const_to_sub2: -; ALL: # %bb.0: -; ALL-NEXT: movdqa {{.*#+}} xmm2 = <42,24,u,46> -; ALL-NEXT: psubd %xmm0, %xmm2 -; ALL-NEXT: paddd %xmm1, %xmm2 -; ALL-NEXT: movdqa %xmm2, %xmm0 -; ALL-NEXT: ret{{[l|q]}} +; X32-LABEL: vec_sink_sub_of_const_to_sub2: +; X32: # %bb.0: +; X32-NEXT: psubd %xmm0, %xmm1 +; X32-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X32-NEXT: movdqa %xmm1, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: vec_sink_sub_of_const_to_sub2: +; X64: # %bb.0: +; X64-NEXT: psubd %xmm0, %xmm1 +; X64-NEXT: paddd {{.*}}(%rip), %xmm1 +; X64-NEXT: movdqa %xmm1, %xmm0 +; X64-NEXT: retq %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %b, %t0 ret <4 x i32> %r