@@ -67,6 +67,9 @@ class AArch64InstructionSelector : public InstructionSelector {
67
67
bool selectCompareBranch (MachineInstr &I, MachineFunction &MF,
68
68
MachineRegisterInfo &MRI) const ;
69
69
70
+ bool selectVectorASHR (MachineInstr &I, MachineRegisterInfo &MRI) const ;
71
+ bool selectVectorSHL (MachineInstr &I, MachineRegisterInfo &MRI) const ;
72
+
70
73
// Helper to generate an equivalent of scalar_to_vector into a new register,
71
74
// returned via 'Dst'.
72
75
MachineInstr *emitScalarToVector (unsigned EltSize,
@@ -98,6 +101,7 @@ class AArch64InstructionSelector : public InstructionSelector {
98
101
MachineRegisterInfo &MRI) const ;
99
102
bool selectIntrinsicWithSideEffects (MachineInstr &I,
100
103
MachineRegisterInfo &MRI) const ;
104
+ bool selectVectorICmp (MachineInstr &I, MachineRegisterInfo &MRI) const ;
101
105
102
106
unsigned emitConstantPoolEntry (Constant *CPVal, MachineFunction &MF) const ;
103
107
MachineInstr *emitLoadFromConstantPool (Constant *CPVal,
@@ -824,6 +828,77 @@ bool AArch64InstructionSelector::selectCompareBranch(
824
828
return true ;
825
829
}
826
830
831
+ bool AArch64InstructionSelector::selectVectorSHL (
832
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
833
+ assert (I.getOpcode () == TargetOpcode::G_SHL);
834
+ unsigned DstReg = I.getOperand (0 ).getReg ();
835
+ const LLT Ty = MRI.getType (DstReg);
836
+ unsigned Src1Reg = I.getOperand (1 ).getReg ();
837
+ unsigned Src2Reg = I.getOperand (2 ).getReg ();
838
+
839
+ if (!Ty.isVector ())
840
+ return false ;
841
+
842
+ unsigned Opc = 0 ;
843
+ const TargetRegisterClass *RC = nullptr ;
844
+ if (Ty == LLT::vector (4 , 32 )) {
845
+ Opc = AArch64::USHLv4i32;
846
+ RC = &AArch64::FPR128RegClass;
847
+ } else if (Ty == LLT::vector (2 , 32 )) {
848
+ Opc = AArch64::USHLv2i32;
849
+ RC = &AArch64::FPR64RegClass;
850
+ } else {
851
+ LLVM_DEBUG (dbgs () << " Unhandled G_SHL type" );
852
+ return false ;
853
+ }
854
+
855
+ MachineIRBuilder MIB (I);
856
+ auto UShl = MIB.buildInstr (Opc, {DstReg}, {Src1Reg, Src2Reg});
857
+ constrainSelectedInstRegOperands (*UShl, TII, TRI, RBI);
858
+ I.eraseFromParent ();
859
+ return true ;
860
+ }
861
+
862
+ bool AArch64InstructionSelector::selectVectorASHR (
863
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
864
+ assert (I.getOpcode () == TargetOpcode::G_ASHR);
865
+ unsigned DstReg = I.getOperand (0 ).getReg ();
866
+ const LLT Ty = MRI.getType (DstReg);
867
+ unsigned Src1Reg = I.getOperand (1 ).getReg ();
868
+ unsigned Src2Reg = I.getOperand (2 ).getReg ();
869
+
870
+ if (!Ty.isVector ())
871
+ return false ;
872
+
873
+ // There is not a shift right register instruction, but the shift left
874
+ // register instruction takes a signed value, where negative numbers specify a
875
+ // right shift.
876
+
877
+ unsigned Opc = 0 ;
878
+ unsigned NegOpc = 0 ;
879
+ const TargetRegisterClass *RC = nullptr ;
880
+ if (Ty == LLT::vector (4 , 32 )) {
881
+ Opc = AArch64::SSHLv4i32;
882
+ NegOpc = AArch64::NEGv4i32;
883
+ RC = &AArch64::FPR128RegClass;
884
+ } else if (Ty == LLT::vector (2 , 32 )) {
885
+ Opc = AArch64::SSHLv2i32;
886
+ NegOpc = AArch64::NEGv2i32;
887
+ RC = &AArch64::FPR64RegClass;
888
+ } else {
889
+ LLVM_DEBUG (dbgs () << " Unhandled G_ASHR type" );
890
+ return false ;
891
+ }
892
+
893
+ MachineIRBuilder MIB (I);
894
+ auto Neg = MIB.buildInstr (NegOpc, {RC}, {Src2Reg});
895
+ constrainSelectedInstRegOperands (*Neg, TII, TRI, RBI);
896
+ auto SShl = MIB.buildInstr (Opc, {DstReg}, {Src1Reg, Neg});
897
+ constrainSelectedInstRegOperands (*SShl, TII, TRI, RBI);
898
+ I.eraseFromParent ();
899
+ return true ;
900
+ }
901
+
827
902
bool AArch64InstructionSelector::selectVaStartAAPCS (
828
903
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
829
904
return false ;
@@ -1318,10 +1393,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
1318
1393
case TargetOpcode::G_FMUL:
1319
1394
case TargetOpcode::G_FDIV:
1320
1395
1321
- case TargetOpcode::G_OR:
1396
+ case TargetOpcode::G_ASHR:
1397
+ if (MRI.getType (I.getOperand (0 ).getReg ()).isVector ())
1398
+ return selectVectorASHR (I, MRI);
1399
+ LLVM_FALLTHROUGH;
1322
1400
case TargetOpcode::G_SHL:
1401
+ if (Opcode == TargetOpcode::G_SHL &&
1402
+ MRI.getType (I.getOperand (0 ).getReg ()).isVector ())
1403
+ return selectVectorSHL (I, MRI);
1404
+ LLVM_FALLTHROUGH;
1405
+ case TargetOpcode::G_OR:
1323
1406
case TargetOpcode::G_LSHR:
1324
- case TargetOpcode::G_ASHR:
1325
1407
case TargetOpcode::G_GEP: {
1326
1408
// Reject the various things we don't support yet.
1327
1409
if (unsupportedBinOp (I, RBI, MRI, TRI))
@@ -1625,6 +1707,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
1625
1707
return true ;
1626
1708
}
1627
1709
case TargetOpcode::G_ICMP: {
1710
+ if (Ty.isVector ())
1711
+ return selectVectorICmp (I, MRI);
1712
+
1628
1713
if (Ty != LLT::scalar (32 )) {
1629
1714
LLVM_DEBUG (dbgs () << " G_ICMP result has type: " << Ty
1630
1715
<< " , expected: " << LLT::scalar (32 ) << ' \n ' );
@@ -1785,6 +1870,178 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
1785
1870
return false ;
1786
1871
}
1787
1872
1873
+ bool AArch64InstructionSelector::selectVectorICmp (
1874
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
1875
+ unsigned DstReg = I.getOperand (0 ).getReg ();
1876
+ LLT DstTy = MRI.getType (DstReg);
1877
+ unsigned SrcReg = I.getOperand (2 ).getReg ();
1878
+ unsigned Src2Reg = I.getOperand (3 ).getReg ();
1879
+ LLT SrcTy = MRI.getType (SrcReg);
1880
+
1881
+ unsigned SrcEltSize = SrcTy.getElementType ().getSizeInBits ();
1882
+ unsigned NumElts = DstTy.getNumElements ();
1883
+
1884
+ // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
1885
+ // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
1886
+ // Third index is cc opcode:
1887
+ // 0 == eq
1888
+ // 1 == ugt
1889
+ // 2 == uge
1890
+ // 3 == ult
1891
+ // 4 == ule
1892
+ // 5 == sgt
1893
+ // 6 == sge
1894
+ // 7 == slt
1895
+ // 8 == sle
1896
+ // ne is done by negating 'eq' result.
1897
+
1898
+ // This table below assumes that for some comparisons the operands will be
1899
+ // commuted.
1900
+ // ult op == commute + ugt op
1901
+ // ule op == commute + uge op
1902
+ // slt op == commute + sgt op
1903
+ // sle op == commute + sge op
1904
+ unsigned PredIdx = 0 ;
1905
+ bool SwapOperands = false ;
1906
+ CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand (1 ).getPredicate ();
1907
+ switch (Pred) {
1908
+ case CmpInst::ICMP_NE:
1909
+ case CmpInst::ICMP_EQ:
1910
+ PredIdx = 0 ;
1911
+ break ;
1912
+ case CmpInst::ICMP_UGT:
1913
+ PredIdx = 1 ;
1914
+ break ;
1915
+ case CmpInst::ICMP_UGE:
1916
+ PredIdx = 2 ;
1917
+ break ;
1918
+ case CmpInst::ICMP_ULT:
1919
+ PredIdx = 3 ;
1920
+ SwapOperands = true ;
1921
+ break ;
1922
+ case CmpInst::ICMP_ULE:
1923
+ PredIdx = 4 ;
1924
+ SwapOperands = true ;
1925
+ break ;
1926
+ case CmpInst::ICMP_SGT:
1927
+ PredIdx = 5 ;
1928
+ break ;
1929
+ case CmpInst::ICMP_SGE:
1930
+ PredIdx = 6 ;
1931
+ break ;
1932
+ case CmpInst::ICMP_SLT:
1933
+ PredIdx = 7 ;
1934
+ SwapOperands = true ;
1935
+ break ;
1936
+ case CmpInst::ICMP_SLE:
1937
+ PredIdx = 8 ;
1938
+ SwapOperands = true ;
1939
+ break ;
1940
+ default :
1941
+ llvm_unreachable (" Unhandled icmp predicate" );
1942
+ return false ;
1943
+ }
1944
+
1945
+ // This table obviously should be tablegen'd when we have our GISel native
1946
+ // tablegen selector.
1947
+
1948
+ static const unsigned OpcTable[4 ][4 ][9 ] = {
1949
+ {
1950
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1951
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1952
+ 0 /* invalid */ },
1953
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1954
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1955
+ 0 /* invalid */ },
1956
+ {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
1957
+ AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
1958
+ AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
1959
+ {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
1960
+ AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
1961
+ AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
1962
+ },
1963
+ {
1964
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1965
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1966
+ 0 /* invalid */ },
1967
+ {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
1968
+ AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
1969
+ AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
1970
+ {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
1971
+ AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
1972
+ AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
1973
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1974
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1975
+ 0 /* invalid */ }
1976
+ },
1977
+ {
1978
+ {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
1979
+ AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
1980
+ AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
1981
+ {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
1982
+ AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
1983
+ AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
1984
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1985
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1986
+ 0 /* invalid */ },
1987
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1988
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1989
+ 0 /* invalid */ }
1990
+ },
1991
+ {
1992
+ {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
1993
+ AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
1994
+ AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
1995
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1996
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1997
+ 0 /* invalid */ },
1998
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
1999
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
2000
+ 0 /* invalid */ },
2001
+ {0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
2002
+ 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ , 0 /* invalid */ ,
2003
+ 0 /* invalid */ }
2004
+ },
2005
+ };
2006
+ unsigned EltIdx = Log2_32 (SrcEltSize / 8 );
2007
+ unsigned NumEltsIdx = Log2_32 (NumElts / 2 );
2008
+ unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
2009
+ if (!Opc) {
2010
+ LLVM_DEBUG (dbgs () << " Could not map G_ICMP to cmp opcode" );
2011
+ return false ;
2012
+ }
2013
+
2014
+ const RegisterBank &VecRB = *RBI.getRegBank (SrcReg, MRI, TRI);
2015
+ const TargetRegisterClass *SrcRC =
2016
+ getRegClassForTypeOnBank (SrcTy, VecRB, RBI, true );
2017
+ if (!SrcRC) {
2018
+ LLVM_DEBUG (dbgs () << " Could not determine source register class.\n " );
2019
+ return false ;
2020
+ }
2021
+
2022
+ unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0 ;
2023
+ if (SrcTy.getSizeInBits () == 128 )
2024
+ NotOpc = NotOpc ? AArch64::NOTv16i8 : 0 ;
2025
+
2026
+ if (SwapOperands)
2027
+ std::swap (SrcReg, Src2Reg);
2028
+
2029
+ MachineIRBuilder MIB (I);
2030
+ auto Cmp = MIB.buildInstr (Opc, {SrcRC}, {SrcReg, Src2Reg});
2031
+ constrainSelectedInstRegOperands (*Cmp, TII, TRI, RBI);
2032
+
2033
+ // Invert if we had a 'ne' cc.
2034
+ if (NotOpc) {
2035
+ Cmp = MIB.buildInstr (NotOpc, {DstReg}, {Cmp});
2036
+ constrainSelectedInstRegOperands (*Cmp, TII, TRI, RBI);
2037
+ } else {
2038
+ MIB.buildCopy (DstReg, Cmp.getReg (0 ));
2039
+ }
2040
+ RBI.constrainGenericRegister (DstReg, *SrcRC, MRI);
2041
+ I.eraseFromParent ();
2042
+ return true ;
2043
+ }
2044
+
1788
2045
MachineInstr *AArch64InstructionSelector::emitScalarToVector (
1789
2046
unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
1790
2047
MachineIRBuilder &MIRBuilder) const {
0 commit comments