Skip to content

Commit abbb894

Browse files
author
QingShan Zhang
committedSep 7, 2018
[PowerPC] Combine ADD to ADDZE
On the ppc64le platform, if ir has the following form, define i64 @addze1(i64 %x, i64 %z) local_unnamed_addr #0 { entry: %cmp = icmp ne i64 %z, CONSTANT (-32767 <= CONSTANT <= 32768) %conv1 = zext i1 %cmp to i64 %add = add nsw i64 %conv1, %x ret i64 %add } we can optimize it to the form below. when C == 0 --> addze X, (addic Z, -1)) / add X, (zext(setne Z, C))-- \ when -32768 <= -C <= 32767 && C != 0 --> addze X, (addic (addi Z, -C), -1) Patch By: HLJ2009 (Li Jia He) Differential Revision: https://reviews.llvm.org/D51403 Reviewed By: Nemanjai llvm-svn: 341634
1 parent 9e6845d commit abbb894

File tree

3 files changed

+270
-0
lines changed

3 files changed

+270
-0
lines changed
 

‎llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,6 +1055,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
10551055
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
10561056

10571057
// We have target-specific dag combine patterns for the following nodes:
1058+
setTargetDAGCombine(ISD::ADD);
10581059
setTargetDAGCombine(ISD::SHL);
10591060
setTargetDAGCombine(ISD::SRA);
10601061
setTargetDAGCombine(ISD::SRL);
@@ -12470,6 +12471,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
1247012471
SDLoc dl(N);
1247112472
switch (N->getOpcode()) {
1247212473
default: break;
12474+
case ISD::ADD:
12475+
return combineADD(N, DCI);
1247312476
case ISD::SHL:
1247412477
return combineSHL(N, DCI);
1247512478
case ISD::SRA:
@@ -14176,6 +14179,100 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
1417614179
return SDValue();
1417714180
}
1417814181

14182+
// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
14183+
// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
14184+
// When C is zero, the equation (addi Z, -C) can be simplified to Z
14185+
// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
14186+
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
14187+
const PPCSubtarget &Subtarget) {
14188+
if (!Subtarget.isPPC64())
14189+
return SDValue();
14190+
14191+
SDValue LHS = N->getOperand(0);
14192+
SDValue RHS = N->getOperand(1);
14193+
14194+
auto isZextOfCompareWithConstant = [](SDValue Op) {
14195+
if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
14196+
Op.getValueType() != MVT::i64)
14197+
return false;
14198+
14199+
SDValue Cmp = Op.getOperand(0);
14200+
if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
14201+
Cmp.getOperand(0).getValueType() != MVT::i64)
14202+
return false;
14203+
14204+
if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
14205+
int64_t NegConstant = 0 - Constant->getSExtValue();
14206+
// Due to the limitations of the addi instruction,
14207+
// -C is required to be [-32768, 32767].
14208+
return isInt<16>(NegConstant);
14209+
}
14210+
14211+
return false;
14212+
};
14213+
14214+
bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
14215+
bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
14216+
14217+
// If there is a pattern, canonicalize a zext operand to the RHS.
14218+
if (LHSHasPattern && !RHSHasPattern)
14219+
std::swap(LHS, RHS);
14220+
else if (!LHSHasPattern && !RHSHasPattern)
14221+
return SDValue();
14222+
14223+
SDLoc DL(N);
14224+
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i64);
14225+
SDValue Cmp = RHS.getOperand(0);
14226+
SDValue Z = Cmp.getOperand(0);
14227+
auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
14228+
14229+
assert(Constant && "Constant Should not be a null pointer.");
14230+
int64_t NegConstant = 0 - Constant->getSExtValue();
14231+
14232+
switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
14233+
default: break;
14234+
case ISD::SETNE: {
14235+
// when C == 0
14236+
// --> addze X, (addic Z, -1).carry
14237+
// /
14238+
// add X, (zext(setne Z, C))--
14239+
// \ when -32768 <= -C <= 32767 && C != 0
14240+
// --> addze X, (addic (addi Z, -C), -1).carry
14241+
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14242+
DAG.getConstant(NegConstant, DL, MVT::i64));
14243+
SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14244+
SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14245+
AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
14246+
return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14247+
SDValue(Addc.getNode(), 1));
14248+
}
14249+
case ISD::SETEQ: {
14250+
// when C == 0
14251+
// --> addze X, (subfic Z, 0).carry
14252+
// /
14253+
// add X, (zext(sete Z, C))--
14254+
// \ when -32768 <= -C <= 32767 && C != 0
14255+
// --> addze X, (subfic (addi Z, -C), 0).carry
14256+
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
14257+
DAG.getConstant(NegConstant, DL, MVT::i64));
14258+
SDValue AddOrZ = NegConstant != 0 ? Add : Z;
14259+
SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
14260+
DAG.getConstant(0, DL, MVT::i64), AddOrZ);
14261+
return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
14262+
SDValue(Subc.getNode(), 1));
14263+
}
14264+
}
14265+
14266+
return SDValue();
14267+
}
14268+
14269+
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
14270+
if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
14271+
return Value;
14272+
14273+
return SDValue();
14274+
}
14275+
1417914276
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1418014277
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
1418114278
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())

‎llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,7 @@ namespace llvm {
10921092
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
10931093
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
10941094
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
1095+
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
10951096

10961097
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
10971098
/// SETCC with integer subtraction when (1) there is a legal way of doing it

‎llvm/test/CodeGen/PowerPC/addze.ll

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
2+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s
3+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
4+
; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s
5+
6+
define i64 @addze1(i64 %X, i64 %Z) {
7+
; CHECK-LABEL: addze1:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: addic [[REG1:r[0-9]+]], [[REG1]], -1
10+
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
11+
; CHECK-NEXT: blr
12+
%cmp = icmp ne i64 %Z, 0
13+
%conv1 = zext i1 %cmp to i64
14+
%add = add nsw i64 %conv1, %X
15+
ret i64 %add
16+
}
17+
18+
define i64 @addze2(i64 %X, i64 %Z) {
19+
; CHECK-LABEL: addze2:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: subfic [[REG1:r[0-9]+]], [[REG1]], 0
22+
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
23+
; CHECK-NEXT: blr
24+
%cmp = icmp eq i64 %Z, 0
25+
%conv1 = zext i1 %cmp to i64
26+
%add = add nsw i64 %conv1, %X
27+
ret i64 %add
28+
}
29+
30+
define i64 @addze3(i64 %X, i64 %Z) {
31+
; CHECK-LABEL: addze3:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768
34+
; CHECK-NEXT: addic [[REG1]], [[REG1]], -1
35+
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
36+
; CHECK-NEXT: blr
37+
%cmp = icmp ne i64 %Z, 32768
38+
%conv1 = zext i1 %cmp to i64
39+
%add = add nsw i64 %conv1, %X
40+
ret i64 %add
41+
}
42+
43+
define i64 @addze4(i64 %X, i64 %Z) {
44+
; CHECK-LABEL: addze4:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768
47+
; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0
48+
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
49+
; CHECK-NEXT: blr
50+
%cmp = icmp eq i64 %Z, 32768
51+
%conv1 = zext i1 %cmp to i64
52+
%add = add nsw i64 %conv1, %X
53+
ret i64 %add
54+
}
55+
56+
define i64 @addze5(i64 %X, i64 %Z) {
57+
; CHECK-LABEL: addze5:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767
60+
; CHECK-NEXT: addic [[REG1]], [[REG1]], -1
61+
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
62+
; CHECK-NEXT: blr
63+
%cmp = icmp ne i64 %Z, -32767
64+
%conv1 = zext i1 %cmp to i64
65+
%add = add nsw i64 %conv1, %X
66+
ret i64 %add
67+
}
68+
69+
define i64 @addze6(i64 %X, i64 %Z) {
70+
; CHECK-LABEL: addze6:
71+
; CHECK: # %bb.0:
72+
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767
73+
; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0
74+
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
75+
; CHECK-NEXT: blr
76+
%cmp = icmp eq i64 %Z, -32767
77+
%conv1 = zext i1 %cmp to i64
78+
%add = add nsw i64 %conv1, %X
79+
ret i64 %add
80+
}
81+
82+
; element is out of range
83+
define i64 @test1(i64 %X, i64 %Z) {
84+
; CHECK-LABEL: test1:
85+
; CHECK: # %bb.0:
86+
; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768
87+
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
88+
; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
89+
; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
90+
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
91+
; CHECK-NEXT: blr
92+
%cmp = icmp ne i64 %Z, -32768
93+
%conv1 = zext i1 %cmp to i64
94+
%add = add nsw i64 %conv1, %X
95+
ret i64 %add
96+
}
97+
98+
define i64 @test2(i64 %X, i64 %Z) {
99+
; CHECK-LABEL: test2:
100+
; CHECK: # %bb.0:
101+
; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768
102+
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
103+
; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
104+
; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
105+
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
106+
; CHECK-NEXT: blr
107+
%cmp = icmp eq i64 %Z, -32768
108+
%conv1 = zext i1 %cmp to i64
109+
%add = add nsw i64 %conv1, %X
110+
ret i64 %add
111+
}
112+
113+
define i64 @test3(i64 %X, i64 %Z) {
114+
; CHECK-LABEL: test3:
115+
; CHECK: # %bb.0:
116+
; CHECK-NEXT: li [[REG1:r[0-9]+]], 0
117+
; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769
118+
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
119+
; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
120+
; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
121+
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
122+
; CHECK-NEXT: blr
123+
%cmp = icmp ne i64 %Z, 32769
124+
%conv1 = zext i1 %cmp to i64
125+
%add = add nsw i64 %conv1, %X
126+
ret i64 %add
127+
}
128+
129+
define i64 @test4(i64 %X, i64 %Z) {
130+
; CHECK-LABEL: test4:
131+
; CHECK: # %bb.0:
132+
; CHECK-NEXT: li [[REG1:r[0-9]+]], 0
133+
; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769
134+
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
135+
; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
136+
; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
137+
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
138+
; CHECK-NEXT: blr
139+
%cmp = icmp eq i64 %Z, 32769
140+
%conv1 = zext i1 %cmp to i64
141+
%add = add nsw i64 %conv1, %X
142+
ret i64 %add
143+
}
144+
145+
; comparison of two registers
146+
define i64 @test5(i64 %X, i64 %Y, i64 %Z) {
147+
; CHECK-LABEL: test5:
148+
; CHECK: # %bb.0:
149+
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]]
150+
; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
151+
; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
152+
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
153+
; CHECK-NEXT: blr
154+
%cmp = icmp ne i64 %Y, %Z
155+
%conv1 = zext i1 %cmp to i64
156+
%add = add nsw i64 %conv1, %X
157+
ret i64 %add
158+
}
159+
160+
define i64 @test6(i64 %X, i64 %Y, i64 %Z) {
161+
; CHECK-LABEL: test6:
162+
; CHECK: # %bb.0:
163+
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]]
164+
; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
165+
; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
166+
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
167+
; CHECK-NEXT: blr
168+
%cmp = icmp eq i64 %Y, %Z
169+
%conv1 = zext i1 %cmp to i64
170+
%add = add nsw i64 %conv1, %X
171+
ret i64 %add
172+
}

0 commit comments

Comments
 (0)
Please sign in to comment.