Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2478,6 +2478,13 @@ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); } + // Hoist one-use subtraction from non-opaque constant: + // (C - x) + y -> (y - x) + C + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + } // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' // rather than 'add 0/-1' (the zext should get folded). Index: test/CodeGen/AArch64/shift-amount-mod.ll =================================================================== --- test/CodeGen/AArch64/shift-amount-mod.ll +++ test/CodeGen/AArch64/shift-amount-mod.ll @@ -375,9 +375,7 @@ define i32 @reg32_lshr_by_add_to_negated(i32 %val, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: reg32_lshr_by_add_to_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w8, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 ; CHECK-NEXT: lsr w0, w0, w8 ; CHECK-NEXT: ret %nega = sub i32 32, %a @@ -388,9 +386,7 @@ define i64 @reg64_lshr_by_add_to_negated(i64 %val, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: reg64_lshr_by_add_to_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: sub x8, x8, x1 -; CHECK-NEXT: add x8, x8, x2 +; CHECK-NEXT: sub x8, x2, x1 ; CHECK-NEXT: lsr x0, x0, x8 ; CHECK-NEXT: ret %nega = sub i64 64, %a Index: test/CodeGen/AArch64/sink-addsub-of-const.ll =================================================================== --- test/CodeGen/AArch64/sink-addsub-of-const.ll +++ test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -66,9 +66,8 @@ ; CHECK-LABEL: sink_sub_from_const_to_add0: ; CHECK: // %bb.0: ; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: mov w9, #32 -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w8 +; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = add i32 %a, %b %t1 = sub i32 32, %t0 @@ -79,9 +78,8 @@ ; CHECK-LABEL: sink_sub_from_const_to_add1: ; CHECK: // %bb.0: ; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: mov w9, #32 -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: add w0, w2, w8 +; CHECK-NEXT: sub w8, w2, w8 +; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = add i32 %a, %b %t1 = sub i32 32, %t0 @@ -136,7 +134,7 @@ ; CHECK-LABEL: sink_sub_of_const_to_sub2: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w1, w0 -; CHECK-NEXT: add w8, w8, w2 +; CHECK-NEXT: add w8, w2, w8 ; CHECK-NEXT: add w0, w8, #32 // =32 ; CHECK-NEXT: ret %t0 = sub i32 %a, %b @@ -250,8 +248,8 @@ ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v3.4s, v0.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, %b %t1 = sub <4 x i32> , %t0 @@ -264,8 +262,8 @@ ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v3.4s, v0.4s -; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, %b %t1 = sub <4 x i32> , %t0 @@ -328,7 +326,7 @@ ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI21_0] ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s ; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %b Index: test/CodeGen/ARM/addsubcarry-promotion.ll =================================================================== --- test/CodeGen/ARM/addsubcarry-promotion.ll +++ test/CodeGen/ARM/addsubcarry-promotion.ll @@ -10,40 +10,63 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { ; ARM-LABEL: fn1: ; ARM: @ %bb.0: @ %entry -; ARM-NEXT: rsb r2, r2, #1 ; ARM-NEXT: adds r0, r1, r0 +; ARM-NEXT: mov r3, #0 +; ARM-NEXT: adc r0, r3, #0 ; ARM-NEXT: movw r1, #65535 -; ARM-NEXT: sxth r2, r2 -; ARM-NEXT: adc r0, r2, #0 -; ARM-NEXT: tst r0, r1 +; ARM-NEXT: sub r0, r0, r2 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: cmp r0, r1 ; ARM-NEXT: bxeq lr ; ARM-NEXT: .LBB0_1: @ %for.cond ; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 ; ARM-NEXT: b .LBB0_1 ; -; THUMB1-LABEL: fn1: -; THUMB1: @ %bb.0: @ %entry -; THUMB1-NEXT: movs r3, #1 -; THUMB1-NEXT: subs r2, r3, r2 -; THUMB1-NEXT: sxth r2, r2 -; THUMB1-NEXT: movs r3, #0 -; THUMB1-NEXT: adds r0, r1, r0 -; THUMB1-NEXT: adcs r3, r2 -; THUMB1-NEXT: lsls r0, r3, #16 -; THUMB1-NEXT: beq .LBB0_2 -; THUMB1-NEXT: .LBB0_1: @ %for.cond -; THUMB1-NEXT: @ =>This Inner Loop Header: Depth=1 -; THUMB1-NEXT: b .LBB0_1 -; THUMB1-NEXT: .LBB0_2: @ %if.end -; THUMB1-NEXT: bx lr +; THUMBV6M-LABEL: fn1: +; THUMBV6M: @ %bb.0: @ %entry +; THUMBV6M-NEXT: movs r3, #0 +; THUMBV6M-NEXT: adds r0, r1, r0 +; THUMBV6M-NEXT: adcs r3, r3 +; THUMBV6M-NEXT: subs r0, r3, r2 +; THUMBV6M-NEXT: uxth r0, r0 +; THUMBV6M-NEXT: ldr r1, .LCPI0_0 +; THUMBV6M-NEXT: cmp r0, r1 +; THUMBV6M-NEXT: beq .LBB0_2 +; THUMBV6M-NEXT: .LBB0_1: @ %for.cond +; THUMBV6M-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMBV6M-NEXT: b .LBB0_1 +; THUMBV6M-NEXT: .LBB0_2: @ %if.end +; THUMBV6M-NEXT: bx lr +; THUMBV6M-NEXT: .p2align 2 +; THUMBV6M-NEXT: @ %bb.3: +; THUMBV6M-NEXT: .LCPI0_0: +; THUMBV6M-NEXT: .long 65535 @ 0xffff +; +; THUMBV8M-BASE-LABEL: fn1: +; THUMBV8M-BASE: @ %bb.0: @ %entry +; THUMBV8M-BASE-NEXT: movs r3, #0 +; THUMBV8M-BASE-NEXT: adds r0, r1, r0 +; THUMBV8M-BASE-NEXT: adcs r3, r3 +; THUMBV8M-BASE-NEXT: subs r0, r3, r2 +; THUMBV8M-BASE-NEXT: uxth r0, r0 +; THUMBV8M-BASE-NEXT: movw r1, #65535 +; THUMBV8M-BASE-NEXT: cmp r0, r1 +; THUMBV8M-BASE-NEXT: beq .LBB0_2 +; THUMBV8M-BASE-NEXT: .LBB0_1: @ %for.cond +; THUMBV8M-BASE-NEXT: @ =>This Inner Loop Header: Depth=1 +; THUMBV8M-BASE-NEXT: b .LBB0_1 +; THUMBV8M-BASE-NEXT: .LBB0_2: @ %if.end +; THUMBV8M-BASE-NEXT: bx lr ; ; THUMB-LABEL: fn1: ; THUMB: @ %bb.0: @ %entry -; THUMB-NEXT: rsb.w r2, r2, #1 ; THUMB-NEXT: adds r0, r0, r1 -; THUMB-NEXT: sxth r2, r2 -; THUMB-NEXT: adc r0, r2, #0 -; THUMB-NEXT: lsls r0, r0, #16 +; THUMB-NEXT: mov.w r3, #0 +; THUMB-NEXT: adc r0, r3, #0 +; THUMB-NEXT: movw r1, #65535 +; THUMB-NEXT: subs r0, r0, r2 +; THUMB-NEXT: uxth r0, r0 +; THUMB-NEXT: cmp r0, r1 ; THUMB-NEXT: it eq ; THUMB-NEXT: bxeq lr ; THUMB-NEXT: .LBB0_1: @ %for.cond Index: test/CodeGen/X86/shift-amount-mod.ll =================================================================== --- test/CodeGen/X86/shift-amount-mod.ll +++ test/CodeGen/X86/shift-amount-mod.ll @@ -861,19 +861,17 @@ ; X32-LABEL: reg32_lshr_by_add_to_negated: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $32, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: # kill: def $cl killed $cl killed $ecx ; X32-NEXT: shrl %cl, %eax ; X32-NEXT: retl ; ; X64-LABEL: reg32_lshr_by_add_to_negated: ; X64: # %bb.0: +; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl $32, %ecx ; X64-NEXT: subl %esi, %ecx -; X64-NEXT: addl %edx, %ecx ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrl %cl, %eax ; X64-NEXT: retq @@ -888,9 +886,9 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl $64, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: addb $64, %cl ; X32-NEXT: movl %esi, %edx ; X32-NEXT: shrl %cl, %edx ; X32-NEXT: shrdl %cl, %esi, %eax @@ -905,11 +903,10 @@ ; ; X64-LABEL: reg64_lshr_by_add_to_negated: ; X64: # %bb.0: +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movl $64, %ecx ; X64-NEXT: subl %esi, %ecx -; X64-NEXT: addl %edx, %ecx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %nega = sub i64 64, %a @@ -1278,11 +1275,10 @@ ; ; X64-LABEL: reg64_lshr_by_negated_unfolded_add_b: ; X64: # %bb.0: +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movl $64, %ecx ; X64-NEXT: subl %esi, %ecx -; X64-NEXT: addl %edx, %ecx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NEXT: shrq %cl, %rax ; X64-NEXT: retq %nega = sub i64 0, %a Index: test/CodeGen/X86/sink-addsub-of-const.ll =================================================================== --- test/CodeGen/X86/sink-addsub-of-const.ll +++ test/CodeGen/X86/sink-addsub-of-const.ll @@ -106,19 +106,19 @@ define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b, i32 %c) { ; X32-LABEL: sink_sub_from_const_to_add0: ; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32, %eax ; X32-NEXT: subl %ecx, %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_sub_from_const_to_add0: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edx killed $edx def $rdx ; X64-NEXT: addl %esi, %edi -; X64-NEXT: movl $32, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: addl %edx, %eax +; X64-NEXT: subl %edi, %edx +; X64-NEXT: leal 32(%rdx), %eax ; X64-NEXT: retq %t0 = add i32 %a, %b %t1 = sub i32 32, %t0 @@ -128,19 +128,19 @@ define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b, i32 %c) { ; X32-LABEL: sink_sub_from_const_to_add1: ; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl $32, %eax ; X32-NEXT: subl %ecx, %eax -; X32-NEXT: addl {{[0-9]+}}(%esp), %eax +; X32-NEXT: addl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: sink_sub_from_const_to_add1: ; X64: # %bb.0: +; X64-NEXT: # kill: def $edx killed $edx def $rdx ; X64-NEXT: addl %esi, %edi -; X64-NEXT: movl $32, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: addl %edx, %eax +; X64-NEXT: subl %edi, %edx +; X64-NEXT: leal 32(%rdx), %eax ; X64-NEXT: retq %t0 = add i32 %a, %b %t1 = sub i32 32, %t0 @@ -234,7 +234,7 @@ ; X64-NEXT: # kill: def $edx killed $edx def $rdx ; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: subl %edi, %esi -; X64-NEXT: leal 32(%rsi,%rdx), %eax +; X64-NEXT: leal 32(%rdx,%rsi), %eax ; X64-NEXT: retq %t0 = sub i32 %a, %b %t1 = sub i32 %t0, 32 @@ -381,28 +381,42 @@ ; Outer 'add' is commutative - 2 variants. define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -; ALL-LABEL: vec_sink_sub_from_const_to_add0: -; ALL: # %bb.0: -; ALL-NEXT: paddd %xmm1, %xmm0 -; ALL-NEXT: movdqa {{.*#+}} xmm1 = <42,24,u,46> -; ALL-NEXT: psubd %xmm0, %xmm1 -; ALL-NEXT: paddd %xmm2, %xmm1 -; ALL-NEXT: movdqa %xmm1, %xmm0 -; ALL-NEXT: ret{{[l|q]}} +; X32-LABEL: vec_sink_sub_from_const_to_add0: +; X32: # %bb.0: +; X32-NEXT: paddd %xmm1, %xmm0 +; X32-NEXT: psubd %xmm0, %xmm2 +; X32-NEXT: paddd {{\.LCPI.*}}, %xmm2 +; X32-NEXT: movdqa %xmm2, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: vec_sink_sub_from_const_to_add0: +; X64: # %bb.0: +; X64-NEXT: paddd %xmm1, %xmm0 +; X64-NEXT: psubd %xmm0, %xmm2 +; X64-NEXT: paddd {{.*}}(%rip), %xmm2 +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq %t0 = add <4 x i32> %a, %b %t1 = sub <4 x i32> , %t0 %r = add <4 x i32> %t1, %c ret <4 x i32> %r } define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -; ALL-LABEL: vec_sink_sub_from_const_to_add1: -; ALL: # %bb.0: -; ALL-NEXT: paddd %xmm1, %xmm0 -; ALL-NEXT: movdqa {{.*#+}} xmm1 = <42,24,u,46> -; ALL-NEXT: psubd %xmm0, %xmm1 -; ALL-NEXT: paddd %xmm2, %xmm1 -; ALL-NEXT: movdqa %xmm1, %xmm0 -; ALL-NEXT: ret{{[l|q]}} +; X32-LABEL: vec_sink_sub_from_const_to_add1: +; X32: # %bb.0: +; X32-NEXT: paddd %xmm1, %xmm0 +; X32-NEXT: psubd %xmm0, %xmm2 +; X32-NEXT: paddd {{\.LCPI.*}}, %xmm2 +; X32-NEXT: movdqa %xmm2, %xmm0 +; X32-NEXT: retl +; +; X64-LABEL: vec_sink_sub_from_const_to_add1: +; X64: # %bb.0: +; X64-NEXT: paddd %xmm1, %xmm0 +; X64-NEXT: psubd %xmm0, %xmm2 +; X64-NEXT: paddd {{.*}}(%rip), %xmm2 +; X64-NEXT: movdqa %xmm2, %xmm0 +; X64-NEXT: retq %t0 = add <4 x i32> %a, %b %t1 = sub <4 x i32> , %t0 %r = add <4 x i32> %c, %t1