Skip to content

Commit 82d284c

Browse files
author
Krzysztof Parzyszek
committedJun 12, 2018
[DAGCombiner] Recognize more patterns for ABS
Differential Revision: https://reviews.llvm.org/D47831 llvm-svn: 334553
1 parent 4eed6cc commit 82d284c

File tree

5 files changed

+65
-36
lines changed

5 files changed

+65
-36
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+27-7
Original file line numberDiff line numberDiff line change
@@ -2555,6 +2555,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
25552555
if (N1.isUndef())
25562556
return N1;
25572557

2558+
// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2559+
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2560+
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2561+
SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2562+
SDValue S0 = N1.getOperand(0);
2563+
if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2564+
unsigned OpSizeInBits = VT.getScalarSizeInBits();
2565+
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2566+
if (C->getAPIntValue() == (OpSizeInBits - 1))
2567+
return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2568+
}
2569+
}
2570+
}
2571+
25582572
// If the relocation model supports it, consider symbol offsets.
25592573
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
25602574
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
@@ -5660,13 +5674,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
56605674
}
56615675

56625676
// fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
5663-
unsigned OpSizeInBits = VT.getScalarSizeInBits();
5664-
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
5665-
N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
5666-
TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5667-
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
5668-
if (C->getAPIntValue() == (OpSizeInBits - 1))
5669-
return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
5677+
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
5678+
SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
5679+
SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
5680+
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
5681+
SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
5682+
SDValue S0 = S.getOperand(0);
5683+
if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
5684+
unsigned OpSizeInBits = VT.getScalarSizeInBits();
5685+
if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
5686+
if (C->getAPIntValue() == (OpSizeInBits - 1))
5687+
return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
5688+
}
5689+
}
56705690
}
56715691

56725692
// fold (xor x, x) -> 0

‎llvm/lib/Target/ARM/ARMInstrNEON.td

+8-12
Original file line numberDiff line numberDiff line change
@@ -5392,23 +5392,19 @@ defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
53925392
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
53935393
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
53945394

5395+
def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5396+
(VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5397+
def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5398+
(VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5399+
5400+
// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5401+
// shift/xor pattern for ABS.
5402+
53955403
def abd_shr :
53965404
PatFrag<(ops node:$in1, node:$in2, node:$shift),
53975405
(NEONvshrs (sub (zext node:$in1),
53985406
(zext node:$in2)), (i32 $shift))>;
53995407

5400-
def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))),
5401-
(v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
5402-
(zext (v8i8 DPR:$opB))),
5403-
(v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))),
5404-
(VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5405-
5406-
def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)),
5407-
(v4i32 (add (sub (zext (v4i16 DPR:$opA)),
5408-
(zext (v4i16 DPR:$opB))),
5409-
(abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))),
5410-
(VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5411-
54125408
def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
54135409
(v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
54145410
(zext (v2i32 DPR:$opB))),

‎llvm/lib/Target/Hexagon/HexagonISelLowering.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -1327,6 +1327,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
13271327
setMinimumJumpTableEntries(std::numeric_limits<int>::max());
13281328
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
13291329

1330+
setOperationAction(ISD::ABS, MVT::i32, Legal);
1331+
setOperationAction(ISD::ABS, MVT::i64, Legal);
1332+
13301333
// Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
13311334
// but they only operate on i64.
13321335
for (MVT VT : MVT::integer_valuetypes()) {

‎llvm/lib/Target/Hexagon/HexagonPatterns.td

-16
Original file line numberDiff line numberDiff line change
@@ -1194,22 +1194,6 @@ let Predicates = [HasV5T] in {
11941194
(i32 (LoReg $Rs)))>;
11951195
}
11961196

1197-
let AddedComplexity = 50 in
1198-
multiclass Abs_pat<InstHexagon MI, PatFrag RsPred, int Sh> {
1199-
// Let y = x >> 31 (for 32-bit), i.e. the sign bit repeated.
1200-
// abs(x) = (x + y) ^ y
1201-
def: Pat<(xor (add (sra RsPred:$Rs, (i32 Sh)), RsPred:$Rs),
1202-
(sra RsPred:$Rs, (i32 Sh))),
1203-
(MI RsPred:$Rs)>;
1204-
// abs(x) = (x ^ y) - y
1205-
def: Pat<(sub (xor RsPred:$Rs, (sra RsPred:$Rs, (i32 Sh))),
1206-
(sra RsPred:$Rs, (i32 Sh))),
1207-
(MI RsPred:$Rs)>;
1208-
}
1209-
1210-
defm: Abs_pat<A2_abs, I32, 31>;
1211-
defm: Abs_pat<A2_absp, I64, 63>;
1212-
12131197
def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
12141198
def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
12151199
def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>;

‎llvm/test/CodeGen/Hexagon/abs.ll

+27-1
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,30 @@ define i64 @f5(i64 %a0) #0 {
5454
ret i64 %v2
5555
}
5656

57-
attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" }
57+
; CHECK-LABEL: f6:
58+
; CHECK: r[[R60:[0-9]+]] = abs(r0)
59+
; CHECK: r[[R61:[0-9]+]] = asr(r0,#31)
60+
; CHECK: r0 = addasl(r[[R61]],r[[R60]],#1)
61+
define i32 @f6(i32 %a0) #0 {
62+
%v0 = ashr i32 %a0, 31
63+
%v1 = add i32 %a0, %v0
64+
%v2 = xor i32 %v0, %v1
65+
%v3 = mul i32 %v2, 2
66+
%v4 = add i32 %v0, %v3
67+
ret i32 %v4
68+
}
69+
70+
; CHECK-LABEL: f7:
71+
; CHECK: r[[R70:[0-9]+]] = abs(r0)
72+
; CHECK: r[[R71:[0-9]+]] = asr(r0,#31)
73+
; CHECK: r0 = addasl(r[[R71]],r[[R70]],#1)
74+
define i32 @f7(i32 %a0) #0 {
75+
%v0 = ashr i32 %a0, 31
76+
%v1 = add i32 %v0, %a0
77+
%v2 = xor i32 %v0, %v1
78+
%v3 = shl i32 %v2, 1
79+
%v4 = add i32 %v0, %v3
80+
ret i32 %v4
81+
}
82+
83+
attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-packets" }

0 commit comments

Comments
 (0)
Please sign in to comment.