Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2957,6 +2957,13 @@ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); } + // (x - C) - y -> (x - y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(1))) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); + } // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1' // rather than 'sub 0/1' (the sext should get folded). Index: llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -314,8 +314,8 @@ ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %b %t1 = sub <4 x i32> %t0, Index: llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll +++ llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll @@ -460,15 +460,15 @@ ; X32-LABEL: vec_sink_sub_of_const_to_sub: ; X32: # %bb.0: ; X32-NEXT: psubd %xmm1, %xmm0 -; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0 ; X32-NEXT: psubd %xmm2, %xmm0 +; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: vec_sink_sub_of_const_to_sub: ; X64: # %bb.0: ; X64-NEXT: psubd %xmm1, %xmm0 -; X64-NEXT: psubd {{.*}}(%rip), %xmm0 ; X64-NEXT: psubd %xmm2, %xmm0 +; X64-NEXT: psubd {{.*}}(%rip), %xmm0 ; X64-NEXT: retq %t0 = sub <4 x i32> %a, %b %t1 = sub <4 x i32> %t0,