Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2545,6 +2545,20 @@ if (N1.isUndef()) return N1; + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { + SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); + SDValue S0 = N1.getOperand(0); + if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } + } + // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { @@ -5650,13 +5664,19 @@ } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) - unsigned OpSizeInBits = VT.getScalarSizeInBits(); - if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && - TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { - if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) - if (C->getAPIntValue() == (OpSizeInBits - 1)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1; + SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1; + if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { + SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); + SDValue S0 = S.getOperand(0); + if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } } // fold (xor x, x) -> 0 Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -5392,23 +5392,19 @@ defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, "vabdl", "u", int_arm_neon_vabdu, zext, 1>; +def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), + (VABDLuv8i16 DPR:$opA, DPR:$opB)>; +def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), + (VABDLuv4i32 DPR:$opA, DPR:$opB)>; + +// ISD::ABS is not legal for v2i64, so VABD needs to be matched from the +// shift/xor pattern for ABS. + def abd_shr : PatFrag<(ops node:$in1, node:$in2, node:$shift), (NEONvshrs (sub (zext node:$in1), (zext node:$in2)), (i32 $shift))>; -def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))), - (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)), - (zext (v8i8 DPR:$opB))), - (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))), - (VABDLuv8i16 DPR:$opA, DPR:$opB)>; - -def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)), - (v4i32 (add (sub (zext (v4i16 DPR:$opA)), - (zext (v4i16 DPR:$opB))), - (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))), - (VABDLuv4i32 DPR:$opA, DPR:$opB)>; - def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), (zext (v2i32 DPR:$opB))), Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1327,6 +1327,9 @@ setMinimumJumpTableEntries(std::numeric_limits::max()); setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::ABS, MVT::i32, Legal); + setOperationAction(ISD::ABS, MVT::i64, Legal); + // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit, // but they only operate on i64. for (MVT VT : MVT::integer_valuetypes()) { Index: lib/Target/Hexagon/HexagonPatterns.td =================================================================== --- lib/Target/Hexagon/HexagonPatterns.td +++ lib/Target/Hexagon/HexagonPatterns.td @@ -1175,22 +1175,6 @@ (i32 (LoReg $Rs)))>; } -let AddedComplexity = 50 in -multiclass Abs_pat { - // Let y = x >> 31 (for 32-bit), i.e. the sign bit repeated. - // abs(x) = (x + y) ^ y - def: Pat<(xor (add (sra RsPred:$Rs, (i32 Sh)), RsPred:$Rs), - (sra RsPred:$Rs, (i32 Sh))), - (MI RsPred:$Rs)>; - // abs(x) = (x ^ y) - y - def: Pat<(sub (xor RsPred:$Rs, (sra RsPred:$Rs, (i32 Sh))), - (sra RsPred:$Rs, (i32 Sh))), - (MI RsPred:$Rs)>; -} - -defm: Abs_pat; -defm: Abs_pat; - def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; Index: test/CodeGen/Hexagon/abs.ll =================================================================== --- test/CodeGen/Hexagon/abs.ll +++ test/CodeGen/Hexagon/abs.ll @@ -54,4 +54,30 @@ ret i64 %v2 } -attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" } +; CHECK-LABEL: f6: +; CHECK: r[[R60:[0-9]+]] = abs(r0) +; CHECK: r[[R61:[0-9]+]] = asr(r0,#31) +; CHECK: r0 = addasl(r[[R61]],r[[R60]],#1) +define i32 @f6(i32 %a0) #0 { + %v0 = ashr i32 %a0, 31 + %v1 = add i32 %a0, %v0 + %v2 = xor i32 %v0, %v1 + %v3 = mul i32 %v2, 2 + %v4 = add i32 %v0, %v3 + ret i32 %v4 +} + +; CHECK-LABEL: f7: +; CHECK: r[[R70:[0-9]+]] = abs(r0) +; CHECK: r[[R71:[0-9]+]] = asr(r0,#31) +; CHECK: r0 = addasl(r[[R71]],r[[R70]],#1) +define i32 @f7(i32 %a0) #0 { + %v0 = ashr i32 %a0, 31 + %v1 = add i32 %v0, %a0 + %v2 = xor i32 %v0, %v1 + %v3 = shl i32 %v2, 1 + %v4 = add i32 %v0, %v3 + ret i32 %v4 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-packets" }