Skip to content

Commit 1445b77

Browse files
committedMar 29, 2019
[PowerPC] Strength reduction of multiply by a constant by shift and add/sub in place
A shift and add/sub sequence combination is faster in place of a multiply by constant. Because the cycle or latency of multiply is not huge, we only consider such following worthy patterns. ``` (mul x, 2^N + 1) => (add (shl x, N), x) (mul x, -(2^N + 1)) => -(add (shl x, N), x) (mul x, 2^N - 1) => (sub (shl x, N), x) (mul x, -(2^N - 1)) => (sub x, (shl x, N)) ``` And the cycles or latency is subtarget-dependent so that we need consider the subtarget to determine to do or not do such transformation. Also data type is considered for different cycles or latency to do multiply. Differential Revision: https://reviews.llvm.org/D58950 llvm-svn: 357233
1 parent 2a3f42c commit 1445b77

File tree

7 files changed

+640
-24
lines changed

7 files changed

+640
-24
lines changed
 

‎llvm/lib/Target/PowerPC/PPCISelLowering.cpp

+86
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
10711071
setTargetDAGCombine(ISD::SHL);
10721072
setTargetDAGCombine(ISD::SRA);
10731073
setTargetDAGCombine(ISD::SRL);
1074+
setTargetDAGCombine(ISD::MUL);
10741075
setTargetDAGCombine(ISD::SINT_TO_FP);
10751076
setTargetDAGCombine(ISD::BUILD_VECTOR);
10761077
if (Subtarget.hasFPCVT())
@@ -12643,6 +12644,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
1264312644
return combineSRA(N, DCI);
1264412645
case ISD::SRL:
1264512646
return combineSRL(N, DCI);
12647+
case ISD::MUL:
12648+
return combineMUL(N, DCI);
1264612649
case PPCISD::SHL:
1264712650
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
1264812651
return N->getOperand(0);
@@ -14565,6 +14568,89 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
1456514568
return SDValue();
1456614569
}
1456714570

14571+
SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
14572+
SelectionDAG &DAG = DCI.DAG;
14573+
14574+
ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
14575+
if (!ConstOpOrElement)
14576+
return SDValue();
14577+
14578+
// An imul is usually smaller than the alternative sequence for legal type.
14579+
if (DAG.getMachineFunction().getFunction().optForMinSize() &&
14580+
isOperationLegal(ISD::MUL, N->getValueType(0)))
14581+
return SDValue();
14582+
14583+
auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
14584+
switch (this->Subtarget.getDarwinDirective()) {
14585+
default:
14586+
// TODO: enhance the condition for subtarget before pwr8
14587+
return false;
14588+
case PPC::DIR_PWR8:
14589+
// type mul add shl
14590+
// scalar 4 1 1
14591+
// vector 7 2 2
14592+
return true;
14593+
case PPC::DIR_PWR9:
14594+
// type mul add shl
14595+
// scalar 5 2 2
14596+
// vector 7 2 2
14597+
14598+
// The cycle RATIO of related operations are showed as a table above.
14599+
// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
14600+
// scalar and vector type. For 2 instrs patterns, add/sub + shl
14601+
// are 4, it is always profitable; but for 3 instrs patterns
14602+
// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
14603+
// So we should only do it for vector type.
14604+
return IsAddOne && IsNeg ? VT.isVector() : true;
14605+
}
14606+
};
14607+
14608+
EVT VT = N->getValueType(0);
14609+
SDLoc DL(N);
14610+
14611+
const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
14612+
bool IsNeg = MulAmt.isNegative();
14613+
APInt MulAmtAbs = MulAmt.abs();
14614+
14615+
if ((MulAmtAbs - 1).isPowerOf2()) {
14616+
// (mul x, 2^N + 1) => (add (shl x, N), x)
14617+
// (mul x, -(2^N + 1)) => -(add (shl x, N), x)
14618+
14619+
if (!IsProfitable(IsNeg, true, VT))
14620+
return SDValue();
14621+
14622+
SDValue Op0 = N->getOperand(0);
14623+
SDValue Op1 =
14624+
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14625+
DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
14626+
SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
14627+
14628+
if (!IsNeg)
14629+
return Res;
14630+
14631+
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
14632+
} else if ((MulAmtAbs + 1).isPowerOf2()) {
14633+
// (mul x, 2^N - 1) => (sub (shl x, N), x)
14634+
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14635+
14636+
if (!IsProfitable(IsNeg, false, VT))
14637+
return SDValue();
14638+
14639+
SDValue Op0 = N->getOperand(0);
14640+
SDValue Op1 =
14641+
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14642+
DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
14643+
14644+
if (!IsNeg)
14645+
return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
14646+
else
14647+
return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
14648+
14649+
} else {
14650+
return SDValue();
14651+
}
14652+
}
14653+
1456814654
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1456914655
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
1457014656
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())

‎llvm/lib/Target/PowerPC/PPCISelLowering.h

+1
Original file line numberDiff line numberDiff line change
@@ -1121,6 +1121,7 @@ namespace llvm {
11211121
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
11221122
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
11231123
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
1124+
SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
11241125
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
11251126
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
11261127
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=generic < %s -mtriple=ppc64-- | FileCheck %s -check-prefix=GENERIC-CHECK
2+
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr8 < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK
3+
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr9 < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK
4+
5+
6+
define i64 @foo(i64 %a) {
7+
entry:
8+
%mul = mul nsw i64 %a, 6
9+
ret i64 %mul
10+
}
11+
12+
; GENERIC-CHECK-LABEL: @foo
13+
; GENERIC-CHECK: mulli r3, r3, 6
14+
; GENERIC-CHECK: blr
15+
16+
define i64 @test1(i64 %a) {
17+
%tmp.1 = mul nsw i64 %a, 16 ; <i64> [#uses=1]
18+
ret i64 %tmp.1
19+
}
20+
; CHECK-LABEL: test1:
21+
; CHECK-NOT: mul
22+
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
23+
24+
25+
define i64 @test2(i64 %a) {
26+
%tmp.1 = mul nsw i64 %a, 17 ; <i64> [#uses=1]
27+
ret i64 %tmp.1
28+
}
29+
; CHECK-LABEL: test2:
30+
; CHECK-NOT: mul
31+
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
32+
; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
33+
34+
define i64 @test3(i64 %a) {
35+
%tmp.1 = mul nsw i64 %a, 15 ; <i64> [#uses=1]
36+
ret i64 %tmp.1
37+
}
38+
; CHECK-LABEL: test3:
39+
; CHECK-NOT: mul
40+
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
41+
; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3
42+
43+
; negtive constant
44+
45+
define i64 @test4(i64 %a) {
46+
%tmp.1 = mul nsw i64 %a, -16 ; <i64> [#uses=1]
47+
ret i64 %tmp.1
48+
}
49+
; CHECK-LABEL: test4:
50+
; CHECK-NOT: mul
51+
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
52+
; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]]
53+
54+
define i64 @test5(i64 %a) {
55+
%tmp.1 = mul nsw i64 %a, -17 ; <i64> [#uses=1]
56+
ret i64 %tmp.1
57+
}
58+
; CHECK-LABEL: test5:
59+
; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17
60+
; PWR8-CHECK-NOT: mul
61+
; PWR8-CHECK: sldi r[[REG1:[0-9]+]], r3, 4
62+
; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
63+
; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]]
64+
65+
define i64 @test6(i64 %a) {
66+
%tmp.1 = mul nsw i64 %a, -15 ; <i64> [#uses=1]
67+
ret i64 %tmp.1
68+
}
69+
; CHECK-LABEL: test6:
70+
; CHECK-NOT: mul
71+
; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
72+
; CHECK-NEXT: sub r[[REG2:[0-9]+]], r3, r[[REG1]]
73+
; CHECK-NOT: neg
74+
75+
; boundary case
76+
77+
define i64 @test7(i64 %a) {
78+
%tmp.1 = mul nsw i64 %a, -9223372036854775808 ; <i64> [#uses=1]
79+
ret i64 %tmp.1
80+
}
81+
; CHECK-LABEL: test7:
82+
; CHECK-NOT: mul
83+
; CHECK: sldi r[[REG1:[0-9]+]], r3, 63
84+
85+
define i64 @test8(i64 %a) {
86+
%tmp.1 = mul nsw i64 %a, 9223372036854775807 ; <i64> [#uses=1]
87+
ret i64 %tmp.1
88+
}
89+
; CHECK-LABEL: test8:
90+
; CHECK-NOT: mul
91+
; CHECK: sldi r[[REG1:[0-9]+]], r3, 63
92+
; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,382 @@
1+
; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-P8
2+
; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr9 | FileCheck %s --check-prefixes=CHECK,CHECK-P9
3+
4+
define <16 x i8> @test1_v16i8(<16 x i8> %a) {
5+
%tmp.1 = mul nsw <16 x i8> %a, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16> ; <<16 x i8>> [#uses=1]
6+
ret <16 x i8> %tmp.1
7+
}
8+
; CHECK-LABEL: test1_v16i8:
9+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
10+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
11+
; CHECK-NOT: vmul
12+
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
13+
14+
define <16 x i8> @test2_v16i8(<16 x i8> %a) {
15+
%tmp.1 = mul nsw <16 x i8> %a, <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17> ; <<16 x i8>> [#uses=1]
16+
ret <16 x i8> %tmp.1
17+
}
18+
; CHECK-LABEL: test2_v16i8:
19+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
20+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
21+
; CHECK-NOT: vmul
22+
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
23+
; CHECK-NEXT: vaddubm v[[REG3:[0-9]+]], v2, v[[REG2]]
24+
25+
define <16 x i8> @test3_v16i8(<16 x i8> %a) {
26+
%tmp.1 = mul nsw <16 x i8> %a, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> ; <<16 x i8>> [#uses=1]
27+
ret <16 x i8> %tmp.1
28+
}
29+
; CHECK-LABEL: test3_v16i8:
30+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
31+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
32+
; CHECK-NOT: vmul
33+
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
34+
; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2
35+
36+
; negtive constant
37+
38+
define <16 x i8> @test4_v16i8(<16 x i8> %a) {
39+
%tmp.1 = mul nsw <16 x i8> %a, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16> ; <<16 x i8>> [#uses=1]
40+
ret <16 x i8> %tmp.1
41+
}
42+
; CHECK-LABEL: test4_v16i8:
43+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
44+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
45+
; CHECK-NOT: vmul
46+
; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]]
47+
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
48+
; CHECK-NEXT: vsububm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]]
49+
50+
define <16 x i8> @test5_v16i8(<16 x i8> %a) {
51+
%tmp.1 = mul nsw <16 x i8> %a, <i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17> ; <<16 x i8>> [#uses=1]
52+
ret <16 x i8> %tmp.1
53+
}
54+
; CHECK-LABEL: test5_v16i8:
55+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
56+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
57+
; CHECK-NOT: vmul
58+
; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]]
59+
; CHECK-NEXT: vaddubm v[[REG4:[0-9]+]], v2, v[[REG3]]
60+
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
61+
; CHECK-NEXT: vsububm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]]
62+
63+
define <16 x i8> @test6_v16i8(<16 x i8> %a) {
64+
%tmp.1 = mul nsw <16 x i8> %a, <i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15> ; <<16 x i8>> [#uses=1]
65+
ret <16 x i8> %tmp.1
66+
}
67+
; CHECK-LABEL: test6_v16i8:
68+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
69+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
70+
; CHECK-NOT: vmul
71+
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
72+
; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v2, v[[REG2]]
73+
74+
; boundary case
75+
76+
define <16 x i8> @test7_v16i8(<16 x i8> %a) {
77+
%tmp.1 = mul nsw <16 x i8> %a, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128> ; <<16 x i8>> [#uses=1]
78+
ret <16 x i8> %tmp.1
79+
}
80+
; CHECK-LABEL: test7_v16i8:
81+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7
82+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7
83+
; CHECK-NOT: vmul
84+
; CHECK-NEXT: vslb v[[REG5:[0-9]+]], v2, v[[REG1]]
85+
86+
define <16 x i8> @test8_v16i8(<16 x i8> %a) {
87+
%tmp.1 = mul nsw <16 x i8> %a, <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127> ; <<16 x i8>> [#uses=1]
88+
ret <16 x i8> %tmp.1
89+
}
90+
; CHECK-LABEL: test8_v16i8:
91+
; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7
92+
; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7
93+
; CHECK-NOT: vmul
94+
; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
95+
; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2
96+
97+
define <8 x i16> @test1_v8i16(<8 x i16> %a) {
98+
%tmp.1 = mul nsw <8 x i16> %a, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> ; <<8 x i16>> [#uses=1]
99+
ret <8 x i16> %tmp.1
100+
}
101+
; CHECK-LABEL: test1_v8i16:
102+
; CHECK: vspltish v[[REG1:[0-9]+]], 4
103+
; CHECK-NOT: vmul
104+
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
105+
106+
define <8 x i16> @test2_v8i16(<8 x i16> %a) {
107+
%tmp.1 = mul nsw <8 x i16> %a, <i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17> ; <<8 x i16>> [#uses=1]
108+
ret <8 x i16> %tmp.1
109+
}
110+
; CHECK-LABEL: test2_v8i16:
111+
; CHECK: vspltish v[[REG1:[0-9]+]], 4
112+
; CHECK-NOT: vmul
113+
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
114+
; CHECK-NEXT: vadduhm v[[REG3:[0-9]+]], v2, v[[REG2]]
115+
116+
define <8 x i16> @test3_v8i16(<8 x i16> %a) {
117+
%tmp.1 = mul nsw <8 x i16> %a, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; <<8 x i16>> [#uses=1]
118+
ret <8 x i16> %tmp.1
119+
}
120+
; CHECK-LABEL: test3_v8i16:
121+
; CHECK: vspltish v[[REG1:[0-9]+]], 4
122+
; CHECK-NOT: vmul
123+
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
124+
; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2
125+
126+
; negtive constant
127+
128+
define <8 x i16> @test4_v8i16(<8 x i16> %a) {
129+
%tmp.1 = mul nsw <8 x i16> %a, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16> ; <<8 x i16>> [#uses=1]
130+
ret <8 x i16> %tmp.1
131+
}
132+
; CHECK-LABEL: test4_v8i16:
133+
; CHECK: vspltish v[[REG1:[0-9]+]], 4
134+
; CHECK-NOT: vmul
135+
; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]]
136+
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
137+
; CHECK-NEXT: vsubuhm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]]
138+
139+
define <8 x i16> @test5_v8i16(<8 x i16> %a) {
140+
%tmp.1 = mul nsw <8 x i16> %a, <i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17> ; <<8 x i16>> [#uses=1]
141+
ret <8 x i16> %tmp.1
142+
}
143+
; CHECK-LABEL: test5_v8i16:
144+
; CHECK: vspltish v[[REG1:[0-9]+]], 4
145+
; CHECK-NOT: vmul
146+
; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]]
147+
; CHECK-NEXT: vadduhm v[[REG4:[0-9]+]], v2, v[[REG3]]
148+
; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
149+
; CHECK-NEXT: vsubuhm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]]
150+
151+
define <8 x i16> @test6_v8i16(<8 x i16> %a) {
152+
%tmp.1 = mul nsw <8 x i16> %a, <i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15> ; <<8 x i16>> [#uses=1]
153+
ret <8 x i16> %tmp.1
154+
}
155+
; CHECK-LABEL: test6_v8i16:
156+
; CHECK: vspltish v[[REG1:[0-9]+]], 4
157+
; CHECK-NOT: vmul
158+
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
159+
; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v2, v[[REG2]]
160+
161+
; boundary case
162+
163+
define <8 x i16> @test7_v8i16(<8 x i16> %a) {
164+
%tmp.1 = mul nsw <8 x i16> %a, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768> ; <<8 x i16>> [#uses=1]
165+
ret <8 x i16> %tmp.1
166+
}
167+
; CHECK-LABEL: test7_v8i16:
168+
; CHECK: vspltish v[[REG1:[0-9]+]], 15
169+
; CHECK-NOT: vmul
170+
; CHECK-NEXT: vslh v[[REG5:[0-9]+]], v2, v[[REG1]]
171+
172+
define <8 x i16> @test8_v8i16(<8 x i16> %a) {
173+
%tmp.1 = mul nsw <8 x i16> %a, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> ; <<8 x i16>> [#uses=1]
174+
ret <8 x i16> %tmp.1
175+
}
176+
; CHECK-LABEL: test8_v8i16:
177+
; CHECK: vspltish v[[REG1:[0-9]+]], 15
178+
; CHECK-NOT: vmul
179+
; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
180+
; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2
181+
182+
define <4 x i32> @test1_v4i32(<4 x i32> %a) {
183+
%tmp.1 = mul nsw <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16> ; <<4 x i32>> [#uses=1]
184+
ret <4 x i32> %tmp.1
185+
}
186+
; CHECK-LABEL: test1_v4i32:
187+
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
188+
; CHECK-NOT: vmul
189+
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
190+
191+
define <4 x i32> @test2_v4i32(<4 x i32> %a) {
192+
%tmp.1 = mul nsw <4 x i32> %a, <i32 17, i32 17, i32 17, i32 17> ; <<4 x i32>> [#uses=1]
193+
ret <4 x i32> %tmp.1
194+
}
195+
; CHECK-LABEL: test2_v4i32:
196+
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
197+
; CHECK-NOT: vmul
198+
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
199+
; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]]
200+
201+
define <4 x i32> @test3_v4i32(<4 x i32> %a) {
202+
%tmp.1 = mul nsw <4 x i32> %a, <i32 15, i32 15, i32 15, i32 15> ; <<4 x i32>> [#uses=1]
203+
ret <4 x i32> %tmp.1
204+
}
205+
; CHECK-LABEL: test3_v4i32:
206+
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
207+
; CHECK-NOT: vmul
208+
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
209+
; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v[[REG2]], v2
210+
211+
; negtive constant
212+
213+
define <4 x i32> @test4_v4i32(<4 x i32> %a) {
214+
%tmp.1 = mul nsw <4 x i32> %a, <i32 -16, i32 -16, i32 -16, i32 -16> ; <<4 x i32>> [#uses=1]
215+
ret <4 x i32> %tmp.1
216+
}
217+
; CHECK-LABEL: test4_v4i32:
218+
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
219+
; CHECK-NOT: vmul
220+
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
221+
; CHECK-P8-NEXT: xxlxor v[[REG3:[0-9]+]],
222+
; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG3]], v[[REG2]]
223+
; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG2]]
224+
225+
define <4 x i32> @test5_v4i32(<4 x i32> %a) {
226+
%tmp.1 = mul nsw <4 x i32> %a, <i32 -17, i32 -17, i32 -17, i32 -17> ; <<4 x i32>> [#uses=1]
227+
ret <4 x i32> %tmp.1
228+
}
229+
; CHECK-LABEL: test5_v4i32:
230+
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
231+
; CHECK-NOT: vmul
232+
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
233+
; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]]
234+
; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
235+
; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG4]], v[[REG3]]
236+
; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG3]]
237+
238+
define <4 x i32> @test6_v4i32(<4 x i32> %a) {
239+
%tmp.1 = mul nsw <4 x i32> %a, <i32 -15, i32 -15, i32 -15, i32 -15> ; <<4 x i32>> [#uses=1]
240+
ret <4 x i32> %tmp.1
241+
}
242+
; CHECK-LABEL: test6_v4i32:
243+
; CHECK: vspltisw v[[REG1:[0-9]+]], 4
244+
; CHECK-NOT: vmul
245+
; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
246+
; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v2, v[[REG2]]
247+
248+
; boundary case
249+
250+
define <4 x i32> @test7_v4i32(<4 x i32> %a) {
251+
%tmp.1 = mul nsw <4 x i32> %a, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
252+
ret <4 x i32> %tmp.1
253+
}
254+
; CHECK-LABEL: test7_v4i32:
255+
; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16
256+
; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15
257+
; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]]
258+
; CHECK-NOT: vmul
259+
; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]]
260+
261+
define <4 x i32> @test8_v4i32(<4 x i32> %a) {
262+
%tmp.1 = mul nsw <4 x i32> %a, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
263+
ret <4 x i32> %tmp.1
264+
}
265+
; CHECK-LABEL: test8_v4i32:
266+
; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16
267+
; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15
268+
; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]]
269+
; CHECK-NOT: vmul
270+
; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]]
271+
; CHECK-NEXT: vsubuwm v[[REG6:[0-9]+]], v[[REG5]], v2
272+
273+
define <2 x i64> @test1_v2i64(<2 x i64> %a) {
274+
%tmp.1 = mul nsw <2 x i64> %a, <i64 16, i64 16> ; <<2 x i64>> [#uses=1]
275+
ret <2 x i64> %tmp.1
276+
}
277+
; CHECK-LABEL: test1_v2i64:
278+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
279+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
280+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
281+
; CHECK-NOT: vmul
282+
; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]]
283+
284+
define <2 x i64> @test2_v2i64(<2 x i64> %a) {
285+
%tmp.1 = mul nsw <2 x i64> %a, <i64 17, i64 17> ; <<2 x i64>> [#uses=1]
286+
ret <2 x i64> %tmp.1
287+
}
288+
289+
; CHECK-LABEL: test2_v2i64:
290+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
291+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
292+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
293+
; CHECK-NOT: vmul
294+
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
295+
; CHECK-NEXT: vaddudm v{{[0-9]+}}, v2, v[[REG3]]
296+
297+
define <2 x i64> @test3_v2i64(<2 x i64> %a) {
298+
%tmp.1 = mul nsw <2 x i64> %a, <i64 15, i64 15> ; <<2 x i64>> [#uses=1]
299+
ret <2 x i64> %tmp.1
300+
}
301+
302+
; CHECK-LABEL: test3_v2i64:
303+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
304+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
305+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
306+
; CHECK-NOT: vmul
307+
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
308+
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
309+
310+
; negtive constant
311+
312+
define <2 x i64> @test4_v2i64(<2 x i64> %a) {
313+
%tmp.1 = mul nsw <2 x i64> %a, <i64 -16, i64 -16> ; <<2 x i64>> [#uses=1]
314+
ret <2 x i64> %tmp.1
315+
}
316+
317+
; CHECK-LABEL: test4_v2i64:
318+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
319+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
320+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
321+
; CHECK-NOT: vmul
322+
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
323+
; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
324+
; CHECK-P8-NEXT: vsubudm v{{[0-9]+}}, v[[REG4]], v[[REG3]]
325+
; CHECK-P9-NEXT: vnegd v[[REG4:[0-9]+]], v[[REG3]]
326+
327+
define <2 x i64> @test5_v2i64(<2 x i64> %a) {
328+
%tmp.1 = mul nsw <2 x i64> %a, <i64 -17, i64 -17> ; <<2 x i64>> [#uses=1]
329+
ret <2 x i64> %tmp.1
330+
}
331+
332+
; CHECK-LABEL: test5_v2i64:
333+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
334+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
335+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
336+
; CHECK-NOT: vmul
337+
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
338+
; CHECK-NEXT: vaddudm v[[REG4:[0-9]+]], v2, v[[REG3]]
339+
; CHECK-P8-NEXT: xxlxor v[[REG5:[0-9]+]],
340+
; CHECK-P8-NEXT: vsubudm v[[REG6:[0-9]+]], v[[REG5]], v[[REG4]]
341+
; CHECK-P9-NEXT: vnegd v{{[0-9]+}}, v[[REG4]]
342+
343+
define <2 x i64> @test6_v2i64(<2 x i64> %a) {
344+
%tmp.1 = mul nsw <2 x i64> %a, <i64 -15, i64 -15> ; <<2 x i64>> [#uses=1]
345+
ret <2 x i64> %tmp.1
346+
}
347+
348+
; CHECK-LABEL: test6_v2i64:
349+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
350+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
351+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
352+
; CHECK-NOT: vmul
353+
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
354+
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v2, v[[REG3]]
355+
356+
357+
; boundary case
358+
359+
define <2 x i64> @test7_v2i64(<2 x i64> %a) {
360+
%tmp.1 = mul nsw <2 x i64> %a, <i64 -9223372036854775808, i64 -9223372036854775808> ; <<2 x i64>> [#uses=1]
361+
ret <2 x i64> %tmp.1
362+
}
363+
364+
; CHECK-LABEL: test7_v2i64:
365+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
366+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
367+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
368+
; CHECK-NOT: vmul
369+
; CHECK-NEXT: vsld v[[REG4:[0-9]+]], v2, v[[REG2]]
370+
371+
define <2 x i64> @test8_v2i64(<2 x i64> %a) {
372+
%tmp.1 = mul nsw <2 x i64> %a, <i64 9223372036854775807, i64 9223372036854775807> ; <<2 x i64>> [#uses=1]
373+
ret <2 x i64> %tmp.1
374+
}
375+
376+
; CHECK-LABEL: test8_v2i64:
377+
; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
378+
; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
379+
; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
380+
; CHECK-NOT: vmul
381+
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
382+
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK
2+
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK
3+
4+
define i32 @test1(i32 %a) {
5+
%tmp.1 = mul nsw i32 %a, 16 ; <i32> [#uses=1]
6+
ret i32 %tmp.1
7+
}
8+
; CHECK-LABEL: test1:
9+
; CHECK-NOT: mul
10+
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
11+
12+
define i32 @test2(i32 %a) {
13+
%tmp.1 = mul nsw i32 %a, 17 ; <i32> [#uses=1]
14+
ret i32 %tmp.1
15+
}
16+
; CHECK-LABEL: test2:
17+
; CHECK-NOT: mul
18+
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
19+
; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
20+
21+
define i32 @test3(i32 %a) {
22+
%tmp.1 = mul nsw i32 %a, 15 ; <i32> [#uses=1]
23+
ret i32 %tmp.1
24+
}
25+
; CHECK-LABEL: test3:
26+
; CHECK-NOT: mul
27+
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
28+
; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]]
29+
30+
; negtive constant
31+
32+
define i32 @test4(i32 %a) {
33+
%tmp.1 = mul nsw i32 %a, -16 ; <i32> [#uses=1]
34+
ret i32 %tmp.1
35+
}
36+
; CHECK-LABEL: test4:
37+
; CHECK-NOT: mul
38+
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
39+
; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]]
40+
41+
define i32 @test5(i32 %a) {
42+
%tmp.1 = mul nsw i32 %a, -17 ; <i32> [#uses=1]
43+
ret i32 %tmp.1
44+
}
45+
; CHECK-LABEL: test5:
46+
; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17
47+
; PWR8-CHECK-NOT: mul
48+
; PWR8-CHECK: slwi r[[REG1:[0-9]+]], r3, 4
49+
; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
50+
; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]]
51+
52+
define i32 @test6(i32 %a) {
53+
%tmp.1 = mul nsw i32 %a, -15 ; <i32> [#uses=1]
54+
ret i32 %tmp.1
55+
}
56+
; CHECK-LABEL: test6:
57+
; CHECK-NOT: mul
58+
; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
59+
; CHECK-NEXT: subf r[[REG2:[0-9]+]], r[[REG1]], r3
60+
; CHECK-NOT: neg
61+
62+
; boundary case
63+
64+
define i32 @test7(i32 %a) {
65+
%tmp.1 = mul nsw i32 %a, -2147483648 ; <i32> [#uses=1]
66+
ret i32 %tmp.1
67+
}
68+
; CHECK-LABEL: test7:
69+
; CHECK-NOT: mul
70+
; CHECK: slwi r[[REG1:[0-9]+]], r3, 31
71+
72+
define i32 @test8(i32 %a) {
73+
%tmp.1 = mul nsw i32 %a, 2147483647 ; <i32> [#uses=1]
74+
ret i32 %tmp.1
75+
}
76+
; CHECK-LABEL: test8:
77+
; CHECK-NOT: mul
78+
; CHECK: slwi r[[REG1:[0-9]+]], r3, 31
79+
; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]]

‎llvm/test/CodeGen/PowerPC/mul-neg-power-2.ll

-8
This file was deleted.

‎llvm/test/CodeGen/PowerPC/mulli64.ll

-16
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.