Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17508,31 +17508,61 @@ } // Check to see if this is an integer abs. - // select_cc setg[te] X, 0, X, -X -> - // select_cc setgt X, -1, X, -X -> - // select_cc setl[te] X, 0, -X, X -> - // select_cc setlt X, 1, -X, X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + // Note: no need to consider about setge/setle, + // they will be canonicalized in canonicalizeCmpWithConstant() + // CC N0 N1C N2 N3 + // select_cc setgt X, 0, X, Y -> + // select_cc setgt X, -1, X, Y -> + // select_cc setlt X, 0, Y, X -> + // select_cc setlt X, 1, Y, X -> + // fold to: + // Z = sra (X, size(X)-1); xor (add (X, Z), Z) + // on condtion: + // Y = sub (0, X) + // or X = sub (a, b) && Y = sub (b, a) + // or fold to: + // Z = sra (Y, size(Y)-1); xor (add (Y, Z), Z) + // on condtion: + // X = sub (0, Y) + if (N1C) { - ConstantSDNode *SubC = nullptr; - if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || - (N1C->isAllOnesValue() && CC == ISD::SETGT)) && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) - SubC = dyn_cast(N3.getOperand(0)); - else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || - (N1C->isOne() && CC == ISD::SETLT)) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) - SubC = dyn_cast(N2.getOperand(0)); + bool IntegerAbs = false; + bool N1CGtNegOne = (N1C->isNullValue() && CC == ISD::SETGT) || + (N1C->isAllOnesValue() && CC == ISD::SETGT); + bool N1CLtOne = (N1C->isNullValue() && CC == ISD::SETLT) || + (N1C->isOne() && CC == ISD::SETLT); + bool N2Op0Neg = false; + bool N3Op0Neg = false; + if (N2.getOpcode() == ISD::SUB) { + ConstantSDNode *SubC = dyn_cast(N2.getOperand(0)); + N2Op0Neg = SubC && SubC->isNullValue(); + } + if (N3.getOpcode() == ISD::SUB) { + ConstantSDNode *SubC = dyn_cast(N3.getOperand(0)); + N3Op0Neg = SubC && SubC->isNullValue(); + } + bool N3NegN2 = (N3Op0Neg && N2 == N3.getOperand(1)) || + (N2Op0Neg && N3 == N2.getOperand(1)) || + (N3.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB && + N2.getOperand(0) == N3.getOperand(1) && + N2.getOperand(1) == N3.getOperand(0)); + + if (N3NegN2 && ((N1CGtNegOne && N0 == N2) || (N1CLtOne && N0 == N3))) + IntegerAbs = true; EVT XType = N0.getValueType(); - if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDLoc DL(N0); + if (IntegerAbs && XType.isInteger()) { + SDValue N = N0; + /* fold (X = sub (0, Y), ABS(X)) -> ABS(Y)*/ + if ((N0 == N2 && N2Op0Neg) || (N0 == N3 && N3Op0Neg)) + N = N0.getOperand(1); + SDLoc DL(N); SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, - N0, + N, DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); + getShiftAmountTy(N.getValueType()))); SDValue Add = DAG.getNode(ISD::ADD, DL, - XType, N0, Shift); + XType, N, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); Index: llvm/test/CodeGen/ARM/iabs.ll =================================================================== --- llvm/test/CodeGen/ARM/iabs.ll +++ llvm/test/CodeGen/ARM/iabs.ll @@ -32,3 +32,13 @@ %cond = select i1 %cmp, i32 %sub, i32 %sub1 ret i32 %cond } + +define i32 @test3(i32 %a) { + %tmp1neg = sub i32 0, %a + %b = icmp sgt i32 %tmp1neg, -1 + %abs = select i1 %b, i32 %tmp1neg, i32 %a + ret i32 %abs +; CHECK: cmp +; CHECK: rsbmi r0, r0, #0 +; CHECK: bx lr +} Index: llvm/test/CodeGen/ARM/iabs64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/iabs64.ll @@ -0,0 +1,32 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=arm64 2>&1 | FileCheck %s +define i64 @abs_expression64(i64 %a, i64 %b) { + %1 = sub nsw i64 %a, %b + %2 = icmp sgt i64 %1, 0 + %3 = sub nsw i64 %b, %a + %4 = select i1 %2, i64 %1, i64 %3 + ret i64 %4 +} + +; CHECK: subs +; CHECK: cneg +; CHECK: ret + +define i64 @test(i64 %a) { + %tmp1neg = sub i64 0, %a + %b = icmp sgt i64 %a, -1 + %abs = select i1 %b, i64 %a, i64 %tmp1neg + ret i64 %abs +} + +; CHECK: cneg +; CHECK: ret + +define i64 @test2(i64 %a) { + %tmp1neg = sub i64 0, %a + %b = icmp sgt i64 %tmp1neg, -1 + %abs = select i1 %b, i64 %tmp1neg, i64 %a + ret i64 %abs +} + +; CHECK: cneg +; CHECK: ret Index: llvm/test/CodeGen/PowerPC/iabs.ll =================================================================== --- llvm/test/CodeGen/PowerPC/iabs.ll +++ llvm/test/CodeGen/PowerPC/iabs.ll @@ -1,6 +1,4 @@ -; REQUIRES: asserts -; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -stats 2>&1 | \ -; RUN: grep "4 .*Number of machine instrs printed" +; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- 2>&1 | FileCheck %s ;; Integer absolute value, should produce something as good as: ;; srawi r2, r3, 31 @@ -14,3 +12,34 @@ ret i32 %abs } +; CHECK: srawi [[REG1:[0-9]+]], 3, 31 +; CHECK: add 3, 3, [[REG1]] +; CHECK: xor 3, 3, [[REG1]] +; CHECK: blr + +define i32 @test_abs(i32 %a) { + %tmp1neg = sub i32 0, %a + %b = icmp sgt i32 %tmp1neg, -1 + %abs = select i1 %b, i32 %tmp1neg, i32 %a + ret i32 %abs +} + +; CHECK: srawi [[REG1:[0-9]+]], 3, 31 +; CHECK: add 3, 3, [[REG1]] +; CHECK: xor 3, 3, [[REG1]] +; CHECK: blr + +define i32 @abs_expression(i32 %a, i32 %b) { + %1 = sub nsw i32 %a, %b + %2 = icmp sgt i32 %1, -1 + %3 = sub nsw i32 %b, %a + %4 = select i1 %2, i32 %1, i32 %3 + ret i32 %4 +} + +; CHECK: subf 3, 4, 3 +; CHECK: srawi 4, 3, 31 +; CHECK: add 3, 3, 4 +; CHECK: xor 3, 3, 4 +; CHECK: blr + Index: llvm/test/CodeGen/PowerPC/iabs64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/iabs64.ll @@ -0,0 +1,39 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s + +define i64 @test(i64 %a) { + %tmp1neg = sub i64 0, %a + %b = icmp sgt i64 %a, -1 + %abs = select i1 %b, i64 %a, i64 %tmp1neg + ret i64 %abs +} + +; CHECK: sradi [[REG1:[0-9]+]], 3, 63 +; CHECK: add 3, 3, [[REG1]] +; CHECK: xor 3, 3, [[REG1]] +; CHECK: blr + +define i64 @test_abs(i64 %a) { + %tmp1neg = sub i64 0, %a + %b = icmp sgt i64 %tmp1neg, -1 + %abs = select i1 %b, i64 %tmp1neg, i64 %a + ret i64 %abs +} + +; CHECK: sradi [[REG1:[0-9]+]], 3, 63 +; CHECK: add 3, 3, [[REG1]] +; CHECK: xor 3, 3, [[REG1]] +; CHECK: blr + +define i64 @abs_expression64(i64 %a, i64 %b) { + %1 = sub nsw i64 %a, %b + %2 = icmp sgt i64 %1, 0 + %3 = sub nsw i64 %b, %a + %4 = select i1 %2, i64 %1, i64 %3 + ret i64 %4 +} + +; CHECK: sub 3, 3, 4 +; CHECK: sradi 4, 3, 63 +; CHECK: add 3, 3, 4 +; CHECK: xor 3, 3, 4 +; CHECK: blr Index: llvm/test/CodeGen/X86/iabs.ll =================================================================== --- llvm/test/CodeGen/X86/iabs.ll +++ llvm/test/CodeGen/X86/iabs.ll @@ -94,6 +94,36 @@ ret i32 %abs } +define i32 @test2_i32(i32 %a) nounwind { +; X86-NO-CMOV-LABEL: test2_i32: +; X86-NO-CMOV: # %bb.0: +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: movl %eax, %ecx +; X86-NO-CMOV-NEXT: sarl $31, %ecx +; X86-NO-CMOV-NEXT: addl %ecx, %eax +; X86-NO-CMOV-NEXT: xorl %ecx, %eax +; X86-NO-CMOV-NEXT: retl +; +; X86-CMOV-LABEL: test2_i32: +; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-CMOV-NEXT: movl %ecx, %eax +; X86-CMOV-NEXT: negl %eax +; X86-CMOV-NEXT: cmovll %ecx, %eax +; X86-CMOV-NEXT: retl +; +; X64-LABEL: test2_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmovll %edi, %eax +; X64-NEXT: retq + %tmp1neg = sub i32 0, %a + %b = icmp sgt i32 %tmp1neg, -1 + %abs = select i1 %b, i32 %tmp1neg, i32 %a + ret i32 %abs +} + define i64 @test_i64(i64 %a) nounwind { ; X86-LABEL: test_i64: ; X86: # %bb.0: @@ -119,3 +149,91 @@ ret i64 %abs } +define i64 @test2_i64(i64 %a) nounwind { +; X86-LABEL: test2_i64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test2_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmovlq %rdi, %rax +; X64-NEXT: retq + %tmp1neg = sub i64 0, %a + %b = icmp sgt i64 %tmp1neg, -1 + %abs = select i1 %b, i64 %tmp1neg, i64 %a + ret i64 %abs +} + +define i32 @abs_expression(i32 %a, i32 %b) { +; X86-NO-CMOV-LABEL: abs_expression: +; X86-NO-CMOV: # %bb.0: +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: movl %eax, %ecx +; X86-NO-CMOV-NEXT: sarl $31, %ecx +; X86-NO-CMOV-NEXT: addl %ecx, %eax +; X86-NO-CMOV-NEXT: xorl %ecx, %eax +; X86-NO-CMOV-NEXT: retl +; +; X86-CMOV-LABEL: abs_expression: +; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-CMOV-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-CMOV-NEXT: movl %ecx, %eax +; X86-CMOV-NEXT: negl %eax +; X86-CMOV-NEXT: cmovll %ecx, %eax +; X86-CMOV-NEXT: retl +; +; X64-LABEL: abs_expression: +; X64: # %bb.0: +; X64-NEXT: subl %esi, %edi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: cmovll %edi, %eax +; X64-NEXT: retq + %1 = sub nsw i32 %a, %b + %2 = icmp sgt i32 %1, -1 + %3 = sub nsw i32 %b, %a + %4 = select i1 %2, i32 %1, i32 %3 + ret i32 %4 +} + + +define i64 @abs_expression64(i64 %a, i64 %b) { +; X86-LABEL: abs_expression64: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: abs_expression64: +; X64: # %bb.0: +; X64-NEXT: subq %rsi, %rdi +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: cmovlq %rdi, %rax +; X64-NEXT: retq + %1 = sub nsw i64 %a, %b + %2 = icmp sgt i64 %1, 0 + %3 = sub nsw i64 %b, %a + %4 = select i1 %2, i64 %1, i64 %3 + ret i64 %4 +}