Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -124,6 +124,9 @@ def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; +def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", + "Enable POWER9 Altivec instructions", + [FeatureP8Altivec]>; def FeatureDirectMove : SubtargetFeature<"direct-move", "HasDirectMove", "true", "Enable Power8 direct move instructions", Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -1213,3 +1213,74 @@ int_ppc_altivec_crypto_vncipherlast, v2i64>; def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; } // HasP8Crypto + +// The following altivec instructions were introduced in Power ISA 3.0 +def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">; +let Predicates = [HasP9Altivec] in { + +class VX_VT5_EO5_VB5 xo, bits<5> eo, string opc, list pattern> + : VXForm_RD5_XO5_RS5; + +// Vector Count Leading/Trailing Zero LSB. Dest register is GPR. +def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB), + "vclzlsbb $rD, $vB", IIC_VecGeneral, []>; +def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs g8rc:$rD), (ins vrrc:$vB), + "vctzlsbb $rD, $vB", IIC_VecGeneral, []>; +// Vector Count Trailing Zeros +// TODO: map to llvm cttz? +def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb", []>; +def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh", []>; +def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw", []>; +def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", []>; + +// Vector Extend Sign +// TODO: map to llvm sext? +def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>; // v4i8 -> v4i32 +def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>; // v4i16 -> v4i32 +def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>; // v2i8 -> v2i64 +def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>; // v2i16 -> v2i64 +def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>; // v2i32 -> v2i64 + +// Vector Integer Negate +// TODO: map to llvm ineg? +def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", []>; +def VNEGD : VX_VT5_EO5_VB5<1538, 7, "vnegd", []>; + +// Vector Parity Byte +def VPRTYBW : VX_VT5_EO5_VB5<1538, 8, "vprtybw", []>; +def VPRTYBD : VX_VT5_EO5_VB5<1538, 9, "vprtybd", []>; +def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", []>; + +// Vector Bit Permute (Right-indexed) +def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vbpermd $vD, $vA, $vB", IIC_VecFP, []>; +// TODO: use VA1a_Int_Ty3 for intrinsic support +def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>; + +// TODO: replace VX1_VT5_VA5_VB5 with VX1_Int_Ty for intrinsic support +class VX1_VT5_VA5_VB5 xo, string opc, list pattern> + : VXForm_1; + +// Vector Rotate Left Mask/Mask-Insert +def VRLWNM : VX1_VT5_VA5_VB5<389, "vrlwnm", []>; +def VRLWMI : VX1_VT5_VA5_VB5<133, "vrlwmi", []>; +def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm", []>; +def VRLDMI : VX1_VT5_VA5_VB5<197, "vrldmi", []>; + +// Vector Shift Left/Right +def VSLV : VX1_VT5_VA5_VB5<1860, "vslv", []>; +def VSRV : VX1_VT5_VA5_VB5<1796, "vsrv", []>; + +// Vector Multiply-by-10 (& Write Carry) Unsigned Quadword +def VMUL10UQ : VXForm_BX<513, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10uq $vD, $vA", IIC_VecFP, []>; +def VMUL10CUQ : VXForm_BX< 1, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10cuq $vD, $vA", IIC_VecFP, []>; + +// Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword +def VMUL10EUQ : VX1_VT5_VA5_VB5<577, "vmul10euq" , []>; +def VMUL10ECUQ : VX1_VT5_VA5_VB5< 65, "vmul10ecuq", []>; +} // end HasP9Altivec Index: lib/Target/PowerPC/PPCInstrFormats.td =================================================================== --- lib/Target/PowerPC/PPCInstrFormats.td +++ lib/Target/PowerPC/PPCInstrFormats.td @@ -1571,6 +1571,21 @@ let Inst{21-31} = xo; } +/// e.g. [PO, VRT, EO, VRB, XO] +class VXForm_RD5_XO5_RS5 xo, bits<5> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-15} = eo; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + /// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX" class VXForm_CR xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -92,6 +92,7 @@ bool HasP8Vector; bool HasP8Altivec; bool HasP8Crypto; + bool HasP9Altivec; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -229,6 +230,7 @@ bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } bool hasP8Crypto() const { return HasP8Crypto; } + bool hasP9Altivec() const { return HasP9Altivec; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } Index: lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- lib/Target/PowerPC/PPCSubtarget.cpp +++ lib/Target/PowerPC/PPCSubtarget.cpp @@ -70,6 +70,7 @@ HasP8Vector = false; HasP8Altivec = false; HasP8Crypto = false; + HasP9Altivec = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -672,3 +672,88 @@ # CHECK: mfvscr 2 0x10 0x40 0x06 0x04 +# Power9 instructions + +# CHECK: vclzlsbb 2, 3 +0x10 0x40 0x1e 0x02 + +# CHECK: vctzlsbb 2, 3 +0x10 0x41 0x1e 0x02 + +# CHECK: vctzb 2, 3 +0x10 0x5c 0x1e 0x02 + +# CHECK: vctzh 2, 3 +0x10 0x5d 0x1e 0x02 + +# CHECK: vctzw 2, 3 +0x10 0x5e 0x1e 0x02 + +# CHECK: vctzd 2, 3 +0x10 0x5f 0x1e 0x02 + +# CHECK: vextsb2w 2, 3 +0x10 0x50 0x1e 0x02 + +# CHECK: vextsh2w 2, 3 +0x10 0x51 0x1e 0x02 + +# CHECK: vextsb2d 2, 3 +0x10 0x58 0x1e 0x02 + +# CHECK: vextsh2d 2, 3 +0x10 0x59 0x1e 0x02 + +# CHECK: vextsw2d 2, 3 +0x10 0x5a 0x1e 0x02 + +# CHECK: vnegw 2, 3 +0x10 0x46 0x1e 0x02 + +# CHECK: vnegd 2, 3 +0x10 0x47 0x1e 0x02 + +# CHECK: vprtybw 2, 3 +0x10 0x48 0x1e 0x02 + +# CHECK: vprtybd 2, 3 +0x10 0x49 0x1e 0x02 + +# CHECK: vprtybq 2, 3 +0x10 0x4a 0x1e 0x02 + +# CHECK: vbpermd 2, 5, 17 +0x10 0x45 0x8d 0xcc + +# CHECK: vpermr 2, 3, 4, 5 +0x10 0x43 0x21 0x7b + +# CHECK: vrlwnm 2, 3, 4 +0x10 0x43 0x21 0x85 + +# CHECK: vrlwmi 2, 3, 4 +0x10 0x43 0x20 0x85 + +# CHECK: vrldnm 2, 3, 4 +0x10 0x43 0x21 0xc5 + +# CHECK: vrldmi 2, 3, 4 +0x10 0x43 0x20 0xc5 + +# CHECK: vslv 2, 3, 4 +0x10 0x43 0x27 0x44 + +# CHECK: vsrv 2, 3, 4 +0x10 0x43 0x27 0x04 + +# CHECK: vmul10uq 2, 3 +0x10 0x43 0x02 0x01 + +# CHECK: vmul10cuq 2, 3 +0x10 0x43 0x00 0x01 + +# CHECK: vmul10euq 2, 3, 4 +0x10 0x43 0x22 0x41 + +# CHECK: vmul10ecuq 2, 3, 4 +0x10 0x43 0x20 0x41 Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -742,3 +742,105 @@ # CHECK-LE: mfvscr 2 # encoding: [0x04,0x06,0x40,0x10] mfvscr 2 +# Power9 instructions + +# Vector Count Trailing Zeros +# CHECK-BE: vctzb 2, 3 # encoding: [0x10,0x5c,0x1e,0x02] +# CHECK-LE: vctzb 2, 3 # encoding: [0x02,0x1e,0x5c,0x10] + vctzb 2, 3 +# CHECK-BE: vctzh 2, 3 # encoding: [0x10,0x5d,0x1e,0x02] +# CHECK-LE: vctzh 2, 3 # encoding: [0x02,0x1e,0x5d,0x10] + vctzh 2, 3 +# CHECK-BE: vctzw 2, 3 # encoding: [0x10,0x5e,0x1e,0x02] +# CHECK-LE: vctzw 2, 3 # encoding: [0x02,0x1e,0x5e,0x10] + vctzw 2, 3 +# CHECK-BE: vctzd 2, 3 # encoding: [0x10,0x5f,0x1e,0x02] +# CHECK-LE: vctzd 2, 3 # encoding: [0x02,0x1e,0x5f,0x10] + vctzd 2, 3 + +# CHECK-BE: vclzlsbb 2, 3 # encoding: [0x10,0x40,0x1e,0x02] +# CHECK-LE: vclzlsbb 2, 3 # encoding: [0x02,0x1e,0x40,0x10] + vclzlsbb 2, 3 +# CHECK-BE: vctzlsbb 2, 3 # encoding: [0x10,0x41,0x1e,0x02] +# CHECK-LE: vctzlsbb 2, 3 # encoding: [0x02,0x1e,0x41,0x10] + vctzlsbb 2, 3 + +# Vector Extend Sign +# CHECK-BE: vextsb2w 2, 3 # encoding: [0x10,0x50,0x1e,0x02] +# CHECK-LE: vextsb2w 2, 3 # encoding: [0x02,0x1e,0x50,0x10] + vextsb2w 2, 3 +# CHECK-BE: vextsh2w 2, 3 # encoding: [0x10,0x51,0x1e,0x02] +# CHECK-LE: vextsh2w 2, 3 # encoding: [0x02,0x1e,0x51,0x10] + vextsh2w 2, 3 +# CHECK-BE: vextsb2d 2, 3 # encoding: [0x10,0x58,0x1e,0x02] +# CHECK-LE: vextsb2d 2, 3 # encoding: [0x02,0x1e,0x58,0x10] + vextsb2d 2, 3 +# CHECK-BE: vextsh2d 2, 3 # encoding: [0x10,0x59,0x1e,0x02] +# CHECK-LE: vextsh2d 2, 3 # encoding: [0x02,0x1e,0x59,0x10] + vextsh2d 2, 3 +# CHECK-BE: vextsw2d 2, 3 # encoding: [0x10,0x5a,0x1e,0x02] +# CHECK-LE: vextsw2d 2, 3 # encoding: [0x02,0x1e,0x5a,0x10] + vextsw2d 2, 3 + +# Vector Integer Negate +# CHECK-BE: vnegw 2, 3 # encoding: [0x10,0x46,0x1e,0x02] +# CHECK-LE: vnegw 2, 3 # encoding: [0x02,0x1e,0x46,0x10] + vnegw 2, 3 +# CHECK-BE: vnegd 2, 3 # encoding: [0x10,0x47,0x1e,0x02] +# CHECK-LE: vnegd 2, 3 # encoding: [0x02,0x1e,0x47,0x10] + vnegd 2, 3 + +# Vector Parity Byte +# CHECK-BE: vprtybw 2, 3 # encoding: [0x10,0x48,0x1e,0x02] +# CHECK-LE: vprtybw 2, 3 # encoding: [0x02,0x1e,0x48,0x10] + vprtybw 2, 3 +# CHECK-BE: vprtybd 2, 3 # encoding: [0x10,0x49,0x1e,0x02] +# CHECK-LE: vprtybd 2, 3 # encoding: [0x02,0x1e,0x49,0x10] + vprtybd 2, 3 +# CHECK-BE: vprtybq 2, 3 # encoding: [0x10,0x4a,0x1e,0x02] +# CHECK-LE: vprtybq 2, 3 # encoding: [0x02,0x1e,0x4a,0x10] + vprtybq 2, 3 + +# Vector (Bit) Permute (Right-indexed) +# CHECK-BE: vbpermd 2, 5, 17 # encoding: [0x10,0x45,0x8d,0xcc] +# CHECK-LE: vbpermd 2, 5, 17 # encoding: [0xcc,0x8d,0x45,0x10] + vbpermd 2, 5, 17 +# CHECK-BE: vpermr 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x7b] +# CHECK-LE: vpermr 2, 3, 4, 5 # encoding: [0x7b,0x21,0x43,0x10] + vpermr 2, 3, 4, 5 + +# Vector Rotate Left Mask/Mask-Insert +# CHECK-BE: vrlwnm 2, 3, 4 # encoding: [0x10,0x43,0x21,0x85] +# CHECK-LE: vrlwnm 2, 3, 4 # encoding: [0x85,0x21,0x43,0x10] + vrlwnm 2, 3, 4 +# CHECK-BE: vrlwmi 2, 3, 4 # encoding: [0x10,0x43,0x20,0x85] +# CHECK-LE: vrlwmi 2, 3, 4 # encoding: [0x85,0x20,0x43,0x10] + vrlwmi 2, 3, 4 +# CHECK-BE: vrldnm 2, 3, 4 # encoding: [0x10,0x43,0x21,0xc5] +# CHECK-LE: vrldnm 2, 3, 4 # encoding: [0xc5,0x21,0x43,0x10] + vrldnm 2, 3, 4 +# CHECK-BE: vrldmi 2, 3, 4 # encoding: [0x10,0x43,0x20,0xc5] +# CHECK-LE: vrldmi 2, 3, 4 # encoding: [0xc5,0x20,0x43,0x10] + vrldmi 2, 3, 4 + +# Vector Shift Left/Right +# CHECK-BE: vslv 2, 3, 4 # encoding: [0x10,0x43,0x27,0x44] +# CHECK-LE: vslv 2, 3, 4 # encoding: [0x44,0x27,0x43,0x10] + vslv 2, 3, 4 +# CHECK-BE: vsrv 2, 3, 4 # encoding: [0x10,0x43,0x27,0x04] +# CHECK-LE: vsrv 2, 3, 4 # encoding: [0x04,0x27,0x43,0x10] + vsrv 2, 3, 4 + +# Vector Multiply-by-10 +# CHECK-BE: vmul10uq 2, 3 # encoding: [0x10,0x43,0x02,0x01] +# CHECK-LE: vmul10uq 2, 3 # encoding: [0x01,0x02,0x43,0x10] + vmul10uq 2, 3 +# CHECK-BE: vmul10cuq 2, 3 # encoding: [0x10,0x43,0x00,0x01] +# CHECK-LE: vmul10cuq 2, 3 # encoding: [0x01,0x00,0x43,0x10] + vmul10cuq 2, 3 +# CHECK-BE: vmul10euq 2, 3, 4 # encoding: [0x10,0x43,0x22,0x41] +# CHECK-LE: vmul10euq 2, 3, 4 # encoding: [0x41,0x22,0x43,0x10] + vmul10euq 2, 3, 4 +# CHECK-BE: vmul10ecuq 2, 3, 4 # encoding: [0x10,0x43,0x20,0x41] +# CHECK-LE: vmul10ecuq 2, 3, 4 # encoding: [0x41,0x20,0x43,0x10] + vmul10ecuq 2, 3, 4