Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2544,6 +2544,16 @@ if (N1.isUndef()) return N1; + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) + if (N0.getOpcode() == ISD::XOR && N0.getOperand(1) == N1 && + N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); + } + // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { @@ -5649,13 +5659,19 @@ } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) - unsigned OpSizeInBits = VT.getScalarSizeInBits(); - if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && - TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { - if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) - if (C->getAPIntValue() == (OpSizeInBits - 1)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1; + SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1; + if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { + SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); + SDValue S0 = S.getOperand(0); + if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } } // fold (xor x, x) -> 0 Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -5391,6 +5391,13 @@ (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), (VABDLuv2i64 DPR:$opA, DPR:$opB)>; +def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), + (VABDLuv8i16 DPR:$opA, DPR:$opB)>; +def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), + (VABDLuv4i32 DPR:$opA, DPR:$opB)>; +def : Pat<(v2i64 (abs (sub (zext (v2i32 DPR:$opA)), (zext (v2i32 DPR:$opB))))), + (VABDLuv2i64 DPR:$opA, DPR:$opB)>; + // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, "vaba", "s", int_arm_neon_vabds, add>; Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1327,6 +1327,9 @@ setMinimumJumpTableEntries(std::numeric_limits::max()); setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::ABS, MVT::i32, Legal); + setOperationAction(ISD::ABS, MVT::i64, Legal); + // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit, // but they only operate on i64. for (MVT VT : MVT::integer_valuetypes()) { Index: lib/Target/Hexagon/HexagonPatterns.td =================================================================== --- lib/Target/Hexagon/HexagonPatterns.td +++ lib/Target/Hexagon/HexagonPatterns.td @@ -1175,22 +1175,6 @@ (i32 (LoReg $Rs)))>; } -let AddedComplexity = 50 in -multiclass Abs_pat { - // Let y = x >> 31 (for 32-bit), i.e. the sign bit repeated. - // abs(x) = (x + y) ^ y - def: Pat<(xor (add (sra RsPred:$Rs, (i32 Sh)), RsPred:$Rs), - (sra RsPred:$Rs, (i32 Sh))), - (MI RsPred:$Rs)>; - // abs(x) = (x ^ y) - y - def: Pat<(sub (xor RsPred:$Rs, (sra RsPred:$Rs, (i32 Sh))), - (sra RsPred:$Rs, (i32 Sh))), - (MI RsPred:$Rs)>; -} - -defm: Abs_pat; -defm: Abs_pat; - def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; Index: test/CodeGen/Hexagon/abs.ll =================================================================== --- test/CodeGen/Hexagon/abs.ll +++ test/CodeGen/Hexagon/abs.ll @@ -54,4 +54,30 @@ ret i64 %v2 } -attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" } +; CHECK-LABEL: f6: +; CHECK: r[[R60:[0-9]+]] = abs(r0) +; CHECK: r[[R61:[0-9]+]] = asr(r0,#31) +; CHECK: r0 = addasl(r[[R61]],r[[R60]],#1) +define i32 @f6(i32 %a0) #0 { + %v0 = ashr i32 %a0, 31 + %v1 = add i32 %a0, %v0 + %v2 = xor i32 %v0, %v1 + %v3 = mul i32 %v2, 2 + %v4 = add i32 %v0, %v3 + ret i32 %v4 +} + +; CHECK-LABEL: f7: +; CHECK: r[[R70:[0-9]+]] = abs(r0) +; CHECK: r[[R71:[0-9]+]] = asr(r0,#31) +; CHECK: r0 = addasl(r[[R71]],r[[R70]],#1) +define i32 @f7(i32 %a0) #0 { + %v0 = ashr i32 %a0, 31 + %v1 = add i32 %v0, %a0 + %v2 = xor i32 %v0, %v1 + %v3 = shl i32 %v2, 1 + %v4 = add i32 %v0, %v3 + ret i32 %v4 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-packets" }