Skip to content

Commit 9bf092d

Browse files
committedApr 9, 2019
[AArch64][GlobalISel] Add isel support for vector G_ICMP and G_ASHR & G_SHL
The selection for G_ICMP is unfortunately not currently importable from SDAG due to the use of custom SDNodes. To support this, this selection method has an opcode table which has been generated by a script, indexed by various instruction properties. Ideally in future we will have a GISel native selection patterns that we can write in tablegen to improve on this. For selection of some types we also need support for G_ASHR and G_SHL which are generated as a result of legalization. This patch also adds support for them, generating the same code as SelectionDAG currently does. Differential Revision: https://reviews.llvm.org/D60436 llvm-svn: 358035
1 parent 888dd5d commit 9bf092d

File tree

3 files changed

+3729
-2
lines changed

3 files changed

+3729
-2
lines changed
 

‎llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 259 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ class AArch64InstructionSelector : public InstructionSelector {
6767
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
6868
MachineRegisterInfo &MRI) const;
6969

70+
bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
71+
bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
72+
7073
// Helper to generate an equivalent of scalar_to_vector into a new register,
7174
// returned via 'Dst'.
7275
MachineInstr *emitScalarToVector(unsigned EltSize,
@@ -98,6 +101,7 @@ class AArch64InstructionSelector : public InstructionSelector {
98101
MachineRegisterInfo &MRI) const;
99102
bool selectIntrinsicWithSideEffects(MachineInstr &I,
100103
MachineRegisterInfo &MRI) const;
104+
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
101105

102106
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
103107
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
@@ -824,6 +828,77 @@ bool AArch64InstructionSelector::selectCompareBranch(
824828
return true;
825829
}
826830

831+
bool AArch64InstructionSelector::selectVectorSHL(
832+
MachineInstr &I, MachineRegisterInfo &MRI) const {
833+
assert(I.getOpcode() == TargetOpcode::G_SHL);
834+
unsigned DstReg = I.getOperand(0).getReg();
835+
const LLT Ty = MRI.getType(DstReg);
836+
unsigned Src1Reg = I.getOperand(1).getReg();
837+
unsigned Src2Reg = I.getOperand(2).getReg();
838+
839+
if (!Ty.isVector())
840+
return false;
841+
842+
unsigned Opc = 0;
843+
const TargetRegisterClass *RC = nullptr;
844+
if (Ty == LLT::vector(4, 32)) {
845+
Opc = AArch64::USHLv4i32;
846+
RC = &AArch64::FPR128RegClass;
847+
} else if (Ty == LLT::vector(2, 32)) {
848+
Opc = AArch64::USHLv2i32;
849+
RC = &AArch64::FPR64RegClass;
850+
} else {
851+
LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
852+
return false;
853+
}
854+
855+
MachineIRBuilder MIB(I);
856+
auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
857+
constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
858+
I.eraseFromParent();
859+
return true;
860+
}
861+
862+
bool AArch64InstructionSelector::selectVectorASHR(
863+
MachineInstr &I, MachineRegisterInfo &MRI) const {
864+
assert(I.getOpcode() == TargetOpcode::G_ASHR);
865+
unsigned DstReg = I.getOperand(0).getReg();
866+
const LLT Ty = MRI.getType(DstReg);
867+
unsigned Src1Reg = I.getOperand(1).getReg();
868+
unsigned Src2Reg = I.getOperand(2).getReg();
869+
870+
if (!Ty.isVector())
871+
return false;
872+
873+
// There is not a shift right register instruction, but the shift left
874+
// register instruction takes a signed value, where negative numbers specify a
875+
// right shift.
876+
877+
unsigned Opc = 0;
878+
unsigned NegOpc = 0;
879+
const TargetRegisterClass *RC = nullptr;
880+
if (Ty == LLT::vector(4, 32)) {
881+
Opc = AArch64::SSHLv4i32;
882+
NegOpc = AArch64::NEGv4i32;
883+
RC = &AArch64::FPR128RegClass;
884+
} else if (Ty == LLT::vector(2, 32)) {
885+
Opc = AArch64::SSHLv2i32;
886+
NegOpc = AArch64::NEGv2i32;
887+
RC = &AArch64::FPR64RegClass;
888+
} else {
889+
LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
890+
return false;
891+
}
892+
893+
MachineIRBuilder MIB(I);
894+
auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
895+
constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
896+
auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
897+
constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
898+
I.eraseFromParent();
899+
return true;
900+
}
901+
827902
bool AArch64InstructionSelector::selectVaStartAAPCS(
828903
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
829904
return false;
@@ -1318,10 +1393,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
13181393
case TargetOpcode::G_FMUL:
13191394
case TargetOpcode::G_FDIV:
13201395

1321-
case TargetOpcode::G_OR:
1396+
case TargetOpcode::G_ASHR:
1397+
if (MRI.getType(I.getOperand(0).getReg()).isVector())
1398+
return selectVectorASHR(I, MRI);
1399+
LLVM_FALLTHROUGH;
13221400
case TargetOpcode::G_SHL:
1401+
if (Opcode == TargetOpcode::G_SHL &&
1402+
MRI.getType(I.getOperand(0).getReg()).isVector())
1403+
return selectVectorSHL(I, MRI);
1404+
LLVM_FALLTHROUGH;
1405+
case TargetOpcode::G_OR:
13231406
case TargetOpcode::G_LSHR:
1324-
case TargetOpcode::G_ASHR:
13251407
case TargetOpcode::G_GEP: {
13261408
// Reject the various things we don't support yet.
13271409
if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -1625,6 +1707,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
16251707
return true;
16261708
}
16271709
case TargetOpcode::G_ICMP: {
1710+
if (Ty.isVector())
1711+
return selectVectorICmp(I, MRI);
1712+
16281713
if (Ty != LLT::scalar(32)) {
16291714
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
16301715
<< ", expected: " << LLT::scalar(32) << '\n');
@@ -1785,6 +1870,178 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
17851870
return false;
17861871
}
17871872

1873+
bool AArch64InstructionSelector::selectVectorICmp(
1874+
MachineInstr &I, MachineRegisterInfo &MRI) const {
1875+
unsigned DstReg = I.getOperand(0).getReg();
1876+
LLT DstTy = MRI.getType(DstReg);
1877+
unsigned SrcReg = I.getOperand(2).getReg();
1878+
unsigned Src2Reg = I.getOperand(3).getReg();
1879+
LLT SrcTy = MRI.getType(SrcReg);
1880+
1881+
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
1882+
unsigned NumElts = DstTy.getNumElements();
1883+
1884+
// First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
1885+
// Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
1886+
// Third index is cc opcode:
1887+
// 0 == eq
1888+
// 1 == ugt
1889+
// 2 == uge
1890+
// 3 == ult
1891+
// 4 == ule
1892+
// 5 == sgt
1893+
// 6 == sge
1894+
// 7 == slt
1895+
// 8 == sle
1896+
// ne is done by negating 'eq' result.
1897+
1898+
// This table below assumes that for some comparisons the operands will be
1899+
// commuted.
1900+
// ult op == commute + ugt op
1901+
// ule op == commute + uge op
1902+
// slt op == commute + sgt op
1903+
// sle op == commute + sge op
1904+
unsigned PredIdx = 0;
1905+
bool SwapOperands = false;
1906+
CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
1907+
switch (Pred) {
1908+
case CmpInst::ICMP_NE:
1909+
case CmpInst::ICMP_EQ:
1910+
PredIdx = 0;
1911+
break;
1912+
case CmpInst::ICMP_UGT:
1913+
PredIdx = 1;
1914+
break;
1915+
case CmpInst::ICMP_UGE:
1916+
PredIdx = 2;
1917+
break;
1918+
case CmpInst::ICMP_ULT:
1919+
PredIdx = 3;
1920+
SwapOperands = true;
1921+
break;
1922+
case CmpInst::ICMP_ULE:
1923+
PredIdx = 4;
1924+
SwapOperands = true;
1925+
break;
1926+
case CmpInst::ICMP_SGT:
1927+
PredIdx = 5;
1928+
break;
1929+
case CmpInst::ICMP_SGE:
1930+
PredIdx = 6;
1931+
break;
1932+
case CmpInst::ICMP_SLT:
1933+
PredIdx = 7;
1934+
SwapOperands = true;
1935+
break;
1936+
case CmpInst::ICMP_SLE:
1937+
PredIdx = 8;
1938+
SwapOperands = true;
1939+
break;
1940+
default:
1941+
llvm_unreachable("Unhandled icmp predicate");
1942+
return false;
1943+
}
1944+
1945+
// This table obviously should be tablegen'd when we have our GISel native
1946+
// tablegen selector.
1947+
1948+
static const unsigned OpcTable[4][4][9] = {
1949+
{
1950+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1951+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1952+
0 /* invalid */},
1953+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1954+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1955+
0 /* invalid */},
1956+
{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
1957+
AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
1958+
AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
1959+
{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
1960+
AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
1961+
AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
1962+
},
1963+
{
1964+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1965+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1966+
0 /* invalid */},
1967+
{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
1968+
AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
1969+
AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
1970+
{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
1971+
AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
1972+
AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
1973+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1974+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1975+
0 /* invalid */}
1976+
},
1977+
{
1978+
{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
1979+
AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
1980+
AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
1981+
{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
1982+
AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
1983+
AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
1984+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1985+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1986+
0 /* invalid */},
1987+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1988+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1989+
0 /* invalid */}
1990+
},
1991+
{
1992+
{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
1993+
AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
1994+
AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
1995+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1996+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1997+
0 /* invalid */},
1998+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
1999+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2000+
0 /* invalid */},
2001+
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2002+
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
2003+
0 /* invalid */}
2004+
},
2005+
};
2006+
unsigned EltIdx = Log2_32(SrcEltSize / 8);
2007+
unsigned NumEltsIdx = Log2_32(NumElts / 2);
2008+
unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2009+
if (!Opc) {
2010+
LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
2011+
return false;
2012+
}
2013+
2014+
const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2015+
const TargetRegisterClass *SrcRC =
2016+
getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
2017+
if (!SrcRC) {
2018+
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
2019+
return false;
2020+
}
2021+
2022+
unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
2023+
if (SrcTy.getSizeInBits() == 128)
2024+
NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
2025+
2026+
if (SwapOperands)
2027+
std::swap(SrcReg, Src2Reg);
2028+
2029+
MachineIRBuilder MIB(I);
2030+
auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
2031+
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2032+
2033+
// Invert if we had a 'ne' cc.
2034+
if (NotOpc) {
2035+
Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
2036+
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
2037+
} else {
2038+
MIB.buildCopy(DstReg, Cmp.getReg(0));
2039+
}
2040+
RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
2041+
I.eraseFromParent();
2042+
return true;
2043+
}
2044+
17882045
MachineInstr *AArch64InstructionSelector::emitScalarToVector(
17892046
unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
17902047
MachineIRBuilder &MIRBuilder) const {

‎llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir

Lines changed: 3350 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
3+
---
4+
name: shl_v2i32
5+
alignment: 2
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
registers:
10+
- { id: 0, class: fpr }
11+
- { id: 1, class: fpr }
12+
- { id: 2, class: fpr }
13+
machineFunctionInfo: {}
14+
body: |
15+
bb.1:
16+
liveins: $d0, $d1
17+
18+
; CHECK-LABEL: name: shl_v2i32
19+
; CHECK: liveins: $d0, $d1
20+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
21+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
22+
; CHECK: [[USHLv2i32_:%[0-9]+]]:fpr64 = USHLv2i32 [[COPY]], [[COPY1]]
23+
; CHECK: $d0 = COPY [[USHLv2i32_]]
24+
; CHECK: RET_ReallyLR implicit $d0
25+
%0:fpr(<2 x s32>) = COPY $d0
26+
%1:fpr(<2 x s32>) = COPY $d1
27+
%2:fpr(<2 x s32>) = G_SHL %0, %1(<2 x s32>)
28+
$d0 = COPY %2(<2 x s32>)
29+
RET_ReallyLR implicit $d0
30+
31+
...
32+
---
33+
name: shl_v4i32
34+
alignment: 2
35+
legalized: true
36+
regBankSelected: true
37+
tracksRegLiveness: true
38+
registers:
39+
- { id: 0, class: fpr }
40+
- { id: 1, class: fpr }
41+
- { id: 2, class: fpr }
42+
machineFunctionInfo: {}
43+
body: |
44+
bb.1:
45+
liveins: $q0, $q1
46+
47+
; CHECK-LABEL: name: shl_v4i32
48+
; CHECK: liveins: $q0, $q1
49+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
50+
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
51+
; CHECK: [[USHLv4i32_:%[0-9]+]]:fpr128 = USHLv4i32 [[COPY]], [[COPY1]]
52+
; CHECK: $q0 = COPY [[USHLv4i32_]]
53+
; CHECK: RET_ReallyLR implicit $q0
54+
%0:fpr(<4 x s32>) = COPY $q0
55+
%1:fpr(<4 x s32>) = COPY $q1
56+
%2:fpr(<4 x s32>) = G_SHL %0, %1(<4 x s32>)
57+
$q0 = COPY %2(<4 x s32>)
58+
RET_ReallyLR implicit $q0
59+
60+
...
61+
---
62+
name: ashr_v2i32
63+
alignment: 2
64+
legalized: true
65+
regBankSelected: true
66+
tracksRegLiveness: true
67+
registers:
68+
- { id: 0, class: fpr }
69+
- { id: 1, class: fpr }
70+
- { id: 2, class: fpr }
71+
machineFunctionInfo: {}
72+
body: |
73+
bb.1:
74+
liveins: $d0, $d1
75+
76+
; CHECK-LABEL: name: ashr_v2i32
77+
; CHECK: liveins: $d0, $d1
78+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
79+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
80+
; CHECK: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[COPY1]]
81+
; CHECK: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[COPY]], [[NEGv2i32_]]
82+
; CHECK: $d0 = COPY [[SSHLv2i32_]]
83+
; CHECK: RET_ReallyLR implicit $d0
84+
%0:fpr(<2 x s32>) = COPY $d0
85+
%1:fpr(<2 x s32>) = COPY $d1
86+
%2:fpr(<2 x s32>) = G_ASHR %0, %1(<2 x s32>)
87+
$d0 = COPY %2(<2 x s32>)
88+
RET_ReallyLR implicit $d0
89+
90+
...
91+
---
92+
name: ashr_v4i32
93+
alignment: 2
94+
legalized: true
95+
regBankSelected: true
96+
tracksRegLiveness: true
97+
registers:
98+
- { id: 0, class: fpr }
99+
- { id: 1, class: fpr }
100+
- { id: 2, class: fpr }
101+
machineFunctionInfo: {}
102+
body: |
103+
bb.1:
104+
liveins: $q0, $q1
105+
106+
; CHECK-LABEL: name: ashr_v4i32
107+
; CHECK: liveins: $q0, $q1
108+
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
109+
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
110+
; CHECK: [[NEGv4i32_:%[0-9]+]]:fpr128 = NEGv4i32 [[COPY1]]
111+
; CHECK: [[SSHLv4i32_:%[0-9]+]]:fpr128 = SSHLv4i32 [[COPY]], [[NEGv4i32_]]
112+
; CHECK: $q0 = COPY [[SSHLv4i32_]]
113+
; CHECK: RET_ReallyLR implicit $q0
114+
%0:fpr(<4 x s32>) = COPY $q0
115+
%1:fpr(<4 x s32>) = COPY $q1
116+
%2:fpr(<4 x s32>) = G_ASHR %0, %1(<4 x s32>)
117+
$q0 = COPY %2(<4 x s32>)
118+
RET_ReallyLR implicit $q0
119+
120+
...

0 commit comments

Comments
 (0)
Please sign in to comment.