Skip to content

Commit

Permalink
Implement AArch64 Neon instruction set Perm.
Browse files Browse the repository at this point in the history
llvm-svn: 194123
  • Loading branch information
Jiangning Liu committed Nov 6, 2013
1 parent 95d005c commit f4226f1
Showing 6 changed files with 2,649 additions and 0 deletions.
18 changes: 18 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
@@ -1001,6 +1001,24 @@ class NeonI_BitExtract<bit q, bits<2> op2,
// Inherit Rd in 4-0
}

// Format AdvSIMD perm
class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRdnm<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29-24} = 0b001110;
let Inst{23-22} = size;
let Inst{21} = 0b0;
// Inherit Rm in 20-16
let Inst{15} = 0b0;
let Inst{14-12} = opcode;
let Inst{11-10} = 0b10;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}

// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
329 changes: 329 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrNEON.td
Original file line number Diff line number Diff line change
@@ -2360,6 +2360,335 @@ defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
int_aarch64_neon_vminv>;

// The followings are for instruction class (Perm)

class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
string asmop, RegisterOperand OpVPR, string OpS>
: NeonI_Perm<q, size, opcode,
(outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
[], NoItinerary>;

multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> {
def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">;
def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">;
def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">;
def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">;
def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">;
def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">;
def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">;
}

defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">;
defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">;
defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">;
defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">;
defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">;
defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">;

// Extract and Insert
def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
(vector_insert node:$Rn,
(i32 (vector_extract node:$Rm, node:$Ext)),
node:$Ins)>;

def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
(vector_insert node:$Rn,
(f32 (vector_extract node:$Rm, node:$Ext)),
node:$Ins)>;

// uzp1
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rn),
(v16i8 VPR128:$Rn), 2, 1)),
(v16i8 VPR128:$Rn), 4, 2)),
(v16i8 VPR128:$Rn), 6, 3)),
(v16i8 VPR128:$Rn), 8, 4)),
(v16i8 VPR128:$Rn), 10, 5)),
(v16i8 VPR128:$Rn), 12, 6)),
(v16i8 VPR128:$Rn), 14, 7)),
(v16i8 VPR128:$Rm), 0, 8)),
(v16i8 VPR128:$Rm), 2, 9)),
(v16i8 VPR128:$Rm), 4, 10)),
(v16i8 VPR128:$Rm), 6, 11)),
(v16i8 VPR128:$Rm), 8, 12)),
(v16i8 VPR128:$Rm), 10, 13)),
(v16i8 VPR128:$Rm), 12, 14)),
(v16i8 VPR128:$Rm), 14, 15)),
(UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;

class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rn),
(Ty VPR:$Rn), 2, 1)),
(Ty VPR:$Rn), 4, 2)),
(Ty VPR:$Rn), 6, 3)),
(Ty VPR:$Rm), 0, 4)),
(Ty VPR:$Rm), 2, 5)),
(Ty VPR:$Rm), 4, 6)),
(Ty VPR:$Rm), 6, 7)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>;
def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>;

class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rn),
(Ty VPR:$Rn), 2, 1)),
(Ty VPR:$Rm), 0, 2)),
(Ty VPR:$Rm), 2, 3)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>;
def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>;
def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>;

// uzp2
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rm),
(v16i8 VPR128:$Rn), 1, 0)),
(v16i8 VPR128:$Rn), 3, 1)),
(v16i8 VPR128:$Rn), 5, 2)),
(v16i8 VPR128:$Rn), 7, 3)),
(v16i8 VPR128:$Rn), 9, 4)),
(v16i8 VPR128:$Rn), 11, 5)),
(v16i8 VPR128:$Rn), 13, 6)),
(v16i8 VPR128:$Rn), 15, 7)),
(v16i8 VPR128:$Rm), 1, 8)),
(v16i8 VPR128:$Rm), 3, 9)),
(v16i8 VPR128:$Rm), 5, 10)),
(v16i8 VPR128:$Rm), 7, 11)),
(v16i8 VPR128:$Rm), 9, 12)),
(v16i8 VPR128:$Rm), 11, 13)),
(v16i8 VPR128:$Rm), 13, 14)),
(UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;

class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 1)),
(Ty VPR:$Rn), 5, 2)),
(Ty VPR:$Rn), 7, 3)),
(Ty VPR:$Rm), 1, 4)),
(Ty VPR:$Rm), 3, 5)),
(Ty VPR:$Rm), 5, 6)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>;
def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>;

class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 1)),
(Ty VPR:$Rm), 1, 2)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>;
def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>;
def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>;

// zip1
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rn),
(v16i8 VPR128:$Rm), 0, 1)),
(v16i8 VPR128:$Rn), 1, 2)),
(v16i8 VPR128:$Rm), 1, 3)),
(v16i8 VPR128:$Rn), 2, 4)),
(v16i8 VPR128:$Rm), 2, 5)),
(v16i8 VPR128:$Rn), 3, 6)),
(v16i8 VPR128:$Rm), 3, 7)),
(v16i8 VPR128:$Rn), 4, 8)),
(v16i8 VPR128:$Rm), 4, 9)),
(v16i8 VPR128:$Rn), 5, 10)),
(v16i8 VPR128:$Rm), 5, 11)),
(v16i8 VPR128:$Rn), 6, 12)),
(v16i8 VPR128:$Rm), 6, 13)),
(v16i8 VPR128:$Rn), 7, 14)),
(v16i8 VPR128:$Rm), 7, 15)),
(ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;

class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rn), 1, 2)),
(Ty VPR:$Rm), 1, 3)),
(Ty VPR:$Rn), 2, 4)),
(Ty VPR:$Rm), 2, 5)),
(Ty VPR:$Rn), 3, 6)),
(Ty VPR:$Rm), 3, 7)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>;
def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>;

class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rn), 1, 2)),
(Ty VPR:$Rm), 1, 3)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>;
def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>;
def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>;

// zip2
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rm),
(v16i8 VPR128:$Rn), 8, 0)),
(v16i8 VPR128:$Rm), 8, 1)),
(v16i8 VPR128:$Rn), 9, 2)),
(v16i8 VPR128:$Rm), 9, 3)),
(v16i8 VPR128:$Rn), 10, 4)),
(v16i8 VPR128:$Rm), 10, 5)),
(v16i8 VPR128:$Rn), 11, 6)),
(v16i8 VPR128:$Rm), 11, 7)),
(v16i8 VPR128:$Rn), 12, 8)),
(v16i8 VPR128:$Rm), 12, 9)),
(v16i8 VPR128:$Rn), 13, 10)),
(v16i8 VPR128:$Rm), 13, 11)),
(v16i8 VPR128:$Rn), 14, 12)),
(v16i8 VPR128:$Rm), 14, 13)),
(v16i8 VPR128:$Rn), 15, 14)),
(ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;

class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rm),
(Ty VPR:$Rn), 4, 0)),
(Ty VPR:$Rm), 4, 1)),
(Ty VPR:$Rn), 5, 2)),
(Ty VPR:$Rm), 5, 3)),
(Ty VPR:$Rn), 6, 4)),
(Ty VPR:$Rm), 6, 5)),
(Ty VPR:$Rn), 7, 6)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>;
def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>;

class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rm),
(Ty VPR:$Rn), 2, 0)),
(Ty VPR:$Rm), 2, 1)),
(Ty VPR:$Rn), 3, 2)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>;
def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>;
def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>;

// trn1
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rn),
(v16i8 VPR128:$Rm), 0, 1)),
(v16i8 VPR128:$Rm), 2, 3)),
(v16i8 VPR128:$Rm), 4, 5)),
(v16i8 VPR128:$Rm), 6, 7)),
(v16i8 VPR128:$Rm), 8, 9)),
(v16i8 VPR128:$Rm), 10, 11)),
(v16i8 VPR128:$Rm), 12, 13)),
(v16i8 VPR128:$Rm), 14, 15)),
(TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>;

class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rm), 2, 3)),
(Ty VPR:$Rm), 4, 5)),
(Ty VPR:$Rm), 6, 7)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>;
def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>;

class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rm), 2, 3)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>;
def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>;
def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>;

// trn2
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rm),
(v16i8 VPR128:$Rn), 1, 0)),
(v16i8 VPR128:$Rn), 3, 2)),
(v16i8 VPR128:$Rn), 5, 4)),
(v16i8 VPR128:$Rn), 7, 6)),
(v16i8 VPR128:$Rn), 9, 8)),
(v16i8 VPR128:$Rn), 11, 10)),
(v16i8 VPR128:$Rn), 13, 12)),
(v16i8 VPR128:$Rn), 15, 14)),
(TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>;

class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 2)),
(Ty VPR:$Rn), 5, 4)),
(Ty VPR:$Rn), 7, 6)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>;
def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>;

class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 2)),
(INST VPR:$Rn, VPR:$Rm)>;

def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>;
def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>;
def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>;

// End of implementation for instruction class (Perm)

// The followings are for instruction class (3V Diff)

// normal long/long2 pattern
Loading

0 comments on commit f4226f1

Please sign in to comment.