Index: include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- include/llvm/IR/IntrinsicsPowerPC.td +++ include/llvm/IR/IntrinsicsPowerPC.td @@ -73,7 +73,7 @@ [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; -/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16f8 +/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16i8 /// vectors and returns one. These intrinsics have no side effects. class PowerPC_Vec_BBB_Intrinsic : PowerPC_Vec_Intrinsic; +/// PowerPC_Vec_DDD_Intrinsic - A PowerPC intrinsic that takes two v2i64 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_Vec_DDD_Intrinsic + : PowerPC_Vec_Intrinsic; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Class Definitions. @@ -510,6 +517,41 @@ def int_ppc_altivec_vrefp : PowerPC_Vec_FF_Intrinsic<"vrefp">; def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">; +// Power8 Intrinsics +// Crypto +let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". + def int_ppc_altivec_crypto_vsbox : + GCCBuiltin<"__builtin_altivec_crypto_vsbox">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_crypto_vpermxor : + GCCBuiltin<"__builtin_altivec_crypto_vpermxor">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, + llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +// These need diagnostics for invalid arguments so don't inherit from GCCBuiltin +def int_ppc_altivec_crypto_vshasigmad : + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_ppc_altivec_crypto_vshasigmaw : + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +} +def int_ppc_altivec_crypto_vcipher : + PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">; +def int_ppc_altivec_crypto_vcipherlast : + PowerPC_Vec_DDD_Intrinsic<"crypto_vcipherlast">; +def int_ppc_altivec_crypto_vncipher : + PowerPC_Vec_DDD_Intrinsic<"crypto_vncipher">; +def int_ppc_altivec_crypto_vncipherlast : + PowerPC_Vec_DDD_Intrinsic<"crypto_vncipherlast">; +def int_ppc_altivec_crypto_vpmsumb : + PowerPC_Vec_BBB_Intrinsic<"crypto_vpmsumb">; +def int_ppc_altivec_crypto_vpmsumh : + PowerPC_Vec_HHH_Intrinsic<"crypto_vpmsumh">; +def int_ppc_altivec_crypto_vpmsumw : + PowerPC_Vec_WWW_Intrinsic<"crypto_vpmsumw">; +def int_ppc_altivec_crypto_vpmsumd : + PowerPC_Vec_DDD_Intrinsic<"crypto_vpmsumd">; //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Definitions. Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -425,6 +425,7 @@ bool isToken() const override { return Kind == Token; } bool isImm() const override { return Kind == Immediate || Kind == Expression; } + bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); } bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -43,6 +43,7 @@ void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = nullptr); + void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -214,6 +214,13 @@ printOperand(MI, OpNo+1, O); } +void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned int Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 1 && "Invalid u1imm argument!"); + O << (unsigned int)Value; +} + void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -112,6 +112,9 @@ def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", "Enable POWER8 Altivec instructions", [FeatureAltivec]>; +def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", + "Enable POWER8 Crypto instructions", + [FeatureP8Altivec]>; def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; @@ -258,7 +261,7 @@ FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, - FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, + FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, Feature64Bit /*, Feature64BitRegs */, FeatureICBT, DeprecatedMFTB, DeprecatedDST]; } Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -269,6 +269,16 @@ !strconcat(opc, " $vD, $vB"), IIC_VecFP, [(set OutTy:$vD, (IntID InTy:$vB))]>; +class VXBX_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_BX; + +class VXCR_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_CR; + //===----------------------------------------------------------------------===// // Instruction Definitions. @@ -939,6 +949,7 @@ } // end HasAltivec def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; +def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">; let Predicates = [HasP8Altivec] in { // Count Leading Zeros @@ -992,4 +1003,33 @@ "vorc $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (or v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; + +// The cryptography instructions that do not require Category:Vector.Crypto +def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb", + int_ppc_altivec_crypto_vpmsumb, v16i8>; +def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh", + int_ppc_altivec_crypto_vpmsumh, v8i16>; +def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw", + int_ppc_altivec_crypto_vpmsumw, v4i32>; +def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", + int_ppc_altivec_crypto_vpmsumd, v2i64>; +def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", + int_ppc_altivec_crypto_vpermxor, v16i8>; } // end HasP8Altivec + +// Crypto instructions (from builtins) +let Predicates = [HasP8Crypto] in { +def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw", + int_ppc_altivec_crypto_vshasigmaw, v4i32>; +def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad", + int_ppc_altivec_crypto_vshasigmad, v2i64>; +def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher, + v2i64>; +def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast", + int_ppc_altivec_crypto_vcipherlast, v2i64>; +def VNCIPHER : VX1_Int_Ty<1352, "vncipher", + int_ppc_altivec_crypto_vncipher, v2i64>; +def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast", + int_ppc_altivec_crypto_vncipherlast, v2i64>; +def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; +} // HasP8Crypto Index: lib/Target/PowerPC/PPCInstrFormats.td =================================================================== --- lib/Target/PowerPC/PPCInstrFormats.td +++ lib/Target/PowerPC/PPCInstrFormats.td @@ -1470,6 +1470,39 @@ let Inst{21-31} = xo; } +/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX" +class VXForm_CR xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + bits<1> ST; + bits<4> SIX; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16} = ST; + let Inst{17-20} = SIX; + let Inst{21-31} = xo; +} + +/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox" +class VXForm_BX xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VD; + bits<5> VA; + + let Pattern = pattern; + + let Inst{6-10} = VD; + let Inst{11-15} = VA; + let Inst{16-20} = 0; + let Inst{21-31} = xo; +} + // E-4 VXR-Form class VXRForm_1 xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -446,6 +446,15 @@ let ParserMatchClass = PPCRegCRRCAsmOperand; } +def PPCU1ImmAsmOperand : AsmOperandClass { + let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; + let RenderMethod = "addImmOperands"; +} +def u1imm : Operand { + let PrintMethod = "printU1ImmOperand"; + let ParserMatchClass = PPCU1ImmAsmOperand; +} + def PPCU2ImmAsmOperand : AsmOperandClass { let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; let RenderMethod = "addImmOperands"; Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -90,6 +90,7 @@ bool HasVSX; bool HasP8Vector; bool HasP8Altivec; + bool HasP8Crypto; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -218,6 +219,7 @@ bool hasVSX() const { return HasVSX; } bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } + bool hasP8Crypto() const { return HasP8Crypto; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } Index: lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- lib/Target/PowerPC/PPCSubtarget.cpp +++ lib/Target/PowerPC/PPCSubtarget.cpp @@ -69,6 +69,7 @@ HasVSX = false; HasP8Vector = false; HasP8Altivec = false; + HasP8Crypto = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; Index: lib/Target/PowerPC/README_ALTIVEC.txt =================================================================== --- lib/Target/PowerPC/README_ALTIVEC.txt +++ lib/Target/PowerPC/README_ALTIVEC.txt @@ -209,3 +209,48 @@ return b; } +//===----------------------------------------------------------------------===// + +We should do a little better with eliminating dead stores. +The stores to the stack are dead since %a and %b are not needed + +; Function Attrs: nounwind +define <16 x i8> @test_vpmsumb() #0 { + entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + store <16 x i8> , <16 x i8>* %a, align 16 + store <16 x i8> , <16 x i8>* %b, align 16 + %0 = load <16 x i8>* %a, align 16 + %1 = load <16 x i8>* %b, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) + ret <16 x i8> %2 +} + + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 + + +Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: +# BB#0: # %entry + addis 3, 2, .LCPI0_0@toc@ha + addis 4, 2, .LCPI0_1@toc@ha + addi 3, 3, .LCPI0_0@toc@l + addi 4, 4, .LCPI0_1@toc@l + lxvw4x 0, 0, 3 + addi 3, 1, -16 + lxvw4x 35, 0, 4 + stxvw4x 0, 0, 3 + ori 2, 2, 0 + lxvw4x 34, 0, 3 + addi 3, 1, -32 + stxvw4x 35, 0, 3 + vpmsumb 2, 2, 3 + blr + .long 0 + .quad 0 + +The two stxvw4x instructions are not needed. +With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes +are present too. Index: test/CodeGen/PowerPC/crypto_bifs.ll =================================================================== --- test/CodeGen/PowerPC/crypto_bifs.ll +++ test/CodeGen/PowerPC/crypto_bifs.ll @@ -0,0 +1,275 @@ +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s +; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s +; FIXME: The original intent was to add a check-next for the blr after every check. +; However, this currently fails since we don't eliminate stores of the unused +; locals. These stores are sometimes scheduled after the crypto instruction + +; Function Attrs: nounwind +define <16 x i8> @test_vpmsumb() #0 { +entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + store <16 x i8> , <16 x i8>* %a, align 16 + store <16 x i8> , <16 x i8>* %b, align 16 + %0 = load <16 x i8>, <16 x i8>* %a, align 16 + %1 = load <16 x i8>, <16 x i8>* %b, align 16 + %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) + ret <16 x i8> %2 +; CHECK: vpmsumb 2, +} + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 + +; Function Attrs: nounwind +define <8 x i16> @test_vpmsumh() #0 { +entry: + %a = alloca <8 x i16>, align 16 + %b = alloca <8 x i16>, align 16 + store <8 x i16> , <8 x i16>* %a, align 16 + store <8 x i16> , <8 x i16>* %b, align 16 + %0 = load <8 x i16>, <8 x i16>* %a, align 16 + %1 = load <8 x i16>, <8 x i16>* %b, align 16 + %2 = call <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16> %0, <8 x i16> %1) + ret <8 x i16> %2 +; CHECK: vpmsumh 2, +} + +; Function Attrs: nounwind readnone +declare <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16>, <8 x i16>) #1 + +; Function Attrs: nounwind +define <4 x i32> @test_vpmsumw() #0 { +entry: + %a = alloca <4 x i32>, align 16 + %b = alloca <4 x i32>, align 16 + store <4 x i32> , <4 x i32>* %a, align 16 + store <4 x i32> , <4 x i32>* %b, align 16 + %0 = load <4 x i32>, <4 x i32>* %a, align 16 + %1 = load <4 x i32>, <4 x i32>* %b, align 16 + %2 = call <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32> %0, <4 x i32> %1) + ret <4 x i32> %2 +; CHECK: vpmsumw 2, +} + +; Function Attrs: nounwind readnone +declare <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32>, <4 x i32>) #1 + +; Function Attrs: nounwind +define <2 x i64> @test_vpmsumd() #0 { +entry: + %a = alloca <2 x i64>, align 16 + %b = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + store <2 x i64> , <2 x i64>* %b, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = load <2 x i64>, <2 x i64>* %b, align 16 + %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64> %0, <2 x i64> %1) + ret <2 x i64> %2 +; CHECK: vpmsumd 2, +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64>, <2 x i64>) #1 + +; Function Attrs: nounwind +define <2 x i64> @test_vsbox() #0 { +entry: + %a = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64> %0) + ret <2 x i64> %1 +; CHECK: vsbox 2, +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64>) #1 + +; Function Attrs: nounwind +define <16 x i8> @test_vpermxorb() #0 { +entry: + %a = alloca <16 x i8>, align 16 + %b = alloca <16 x i8>, align 16 + %c = alloca <16 x i8>, align 16 + store <16 x i8> , <16 x i8>* %a, align 16 + store <16 x i8> , <16 x i8>* %b, align 16 + store <16 x i8> , <16 x i8>* %c, align 16 + %0 = load <16 x i8>, <16 x i8>* %a, align 16 + %1 = load <16 x i8>, <16 x i8>* %b, align 16 + %2 = load <16 x i8>, <16 x i8>* %c, align 16 + %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) + ret <16 x i8> %3 +; CHECK: vpermxor 2, +} + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8>, <16 x i8>, <16 x i8>) #1 + +; Function Attrs: nounwind +define <8 x i16> @test_vpermxorh() #0 { +entry: + %a = alloca <8 x i16>, align 16 + %b = alloca <8 x i16>, align 16 + %c = alloca <8 x i16>, align 16 + store <8 x i16> , <8 x i16>* %a, align 16 + store <8 x i16> , <8 x i16>* %b, align 16 + store <8 x i16> , <8 x i16>* %c, align 16 + %0 = load <8 x i16>, <8 x i16>* %a, align 16 + %1 = bitcast <8 x i16> %0 to <16 x i8> + %2 = load <8 x i16>, <8 x i16>* %b, align 16 + %3 = bitcast <8 x i16> %2 to <16 x i8> + %4 = load <8 x i16>, <8 x i16>* %c, align 16 + %5 = bitcast <8 x i16> %4 to <16 x i8> + %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <8 x i16> + ret <8 x i16> %7 +; CHECK: vpermxor 2, +} + +; Function Attrs: nounwind +define <4 x i32> @test_vpermxorw() #0 { +entry: + %a = alloca <4 x i32>, align 16 + %b = alloca <4 x i32>, align 16 + %c = alloca <4 x i32>, align 16 + store <4 x i32> , <4 x i32>* %a, align 16 + store <4 x i32> , <4 x i32>* %b, align 16 + store <4 x i32> , <4 x i32>* %c, align 16 + %0 = load <4 x i32>, <4 x i32>* %a, align 16 + %1 = bitcast <4 x i32> %0 to <16 x i8> + %2 = load <4 x i32>, <4 x i32>* %b, align 16 + %3 = bitcast <4 x i32> %2 to <16 x i8> + %4 = load <4 x i32>, <4 x i32>* %c, align 16 + %5 = bitcast <4 x i32> %4 to <16 x i8> + %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <4 x i32> + ret <4 x i32> %7 +; CHECK: vpermxor 2, +} + +; Function Attrs: nounwind +define <2 x i64> @test_vpermxord() #0 { +entry: + %a = alloca <2 x i64>, align 16 + %b = alloca <2 x i64>, align 16 + %c = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + store <2 x i64> , <2 x i64>* %b, align 16 + store <2 x i64> , <2 x i64>* %c, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = bitcast <2 x i64> %0 to <16 x i8> + %2 = load <2 x i64>, <2 x i64>* %b, align 16 + %3 = bitcast <2 x i64> %2 to <16 x i8> + %4 = load <2 x i64>, <2 x i64>* %c, align 16 + %5 = bitcast <2 x i64> %4 to <16 x i8> + %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) + %7 = bitcast <16 x i8> %6 to <2 x i64> + ret <2 x i64> %7 +; CHECK: vpermxor 2, +} + +; Function Attrs: nounwind +define <2 x i64> @test_vcipher() #0 { +entry: + %a = alloca <2 x i64>, align 16 + %b = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + store <2 x i64> , <2 x i64>* %b, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = load <2 x i64>, <2 x i64>* %b, align 16 + %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64> %0, <2 x i64> %1) + ret <2 x i64> %2 +; CHECK: vcipher 2, +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64>, <2 x i64>) #1 + +; Function Attrs: nounwind +define <2 x i64> @test_vcipherlast() #0 { +entry: + %a = alloca <2 x i64>, align 16 + %b = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + store <2 x i64> , <2 x i64>* %b, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = load <2 x i64>, <2 x i64>* %b, align 16 + %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64> %0, <2 x i64> %1) + ret <2 x i64> %2 +; CHECK: vcipherlast 2, +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64>, <2 x i64>) #1 + +; Function Attrs: nounwind +define <2 x i64> @test_vncipher() #0 { +entry: + %a = alloca <2 x i64>, align 16 + %b = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + store <2 x i64> , <2 x i64>* %b, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = load <2 x i64>, <2 x i64>* %b, align 16 + %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64> %0, <2 x i64> %1) + ret <2 x i64> %2 +; CHECK: vncipher 2, +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64>, <2 x i64>) #1 + +; Function Attrs: nounwind +define <2 x i64> @test_vncipherlast() #0 { +entry: + %a = alloca <2 x i64>, align 16 + %b = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + store <2 x i64> , <2 x i64>* %b, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = load <2 x i64>, <2 x i64>* %b, align 16 + %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64> %0, <2 x i64> %1) + ret <2 x i64> %2 +; CHECK: vncipherlast 2, +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64>, <2 x i64>) #1 + +; Function Attrs: nounwind +define <4 x i32> @test_vshasigmaw() #0 { +entry: + %a = alloca <4 x i32>, align 16 + store <4 x i32> , <4 x i32>* %a, align 16 + %0 = load <4 x i32>, <4 x i32>* %a, align 16 + %1 = call <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32> %0, i32 1, i32 15) + ret <4 x i32> %1 +; CHECK: vshasigmaw 2, +} + +; Function Attrs: nounwind readnone +declare <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32>, i32, i32) #1 + +; Function Attrs: nounwind +define <2 x i64> @test_vshasigmad() #0 { +entry: + %a = alloca <2 x i64>, align 16 + store <2 x i64> , <2 x i64>* %a, align 16 + %0 = load <2 x i64>, <2 x i64>* %a, align 16 + %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64> %0, i32 1, i32 15) + ret <2 x i64> %1 +; CHECK: vshasigmad 2, +} + +; Function Attrs: nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64>, i32, i32) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.7.0 (trunk 230949) (llvm/trunk 230946)"} Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -120,6 +120,42 @@ # CHECK: vperm 2, 3, 4, 5 0x10 0x43 0x21 0x6b +# CHECK: vpermxor 2, 3, 4, 5 +0x10 0x43 0x21 0x6d + +# CHECK: vsbox 2, 5 +0x10 0x45 0x05 0xc8 + +# CHECK: vcipher 2, 5, 17 +0x10 0x45 0x8d 0x08 + +# CHECK: vcipherlast 2, 5, 17 +0x10 0x45 0x8d 0x09 + +# CHECK: vncipher 2, 5, 17 +0x10,0x45,0x8d,0x48 + +# CHECK: vncipherlast 2, 5, 17 +0x10,0x45,0x8d,0x49 + +# CHECK: vpmsumb 2, 5, 17 +0x10 0x45 0x8c 0x08 + +# CHECK: vpmsumh 2, 5, 17 +0x10 0x45 0x8c 0x48 + +# CHECK: vpmsumw 2, 5, 17 +0x10 0x45 0x8c 0x88 + +# CHECK: vpmsumd 2, 5, 17 +0x10 0x45 0x8c 0xc8 + +# CHECK: vshasigmaw 2, 3, 0, 11 +0x10 0x43 0x5e 0x82 + +# CHECK: vshasigmad 2, 3, 1, 15 +0x10 0x43 0xfe 0xc2 + # CHECK: vsel 2, 3, 4, 5 0x10 0x43 0x21 0x6a Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -133,6 +133,55 @@ # CHECK-BE: vperm 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x6b] # CHECK-LE: vperm 2, 3, 4, 5 # encoding: [0x6b,0x21,0x43,0x10] vperm 2, 3, 4, 5 + +# CHECK-BE: vpermxor 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x6d] +# CHECK-LE: vpermxor 2, 3, 4, 5 # encoding: [0x6d,0x21,0x43,0x10] + vpermxor 2, 3, 4, 5 + +# CHECK-BE: vsbox 2, 5 # encoding: [0x10,0x45,0x05,0xc8] +# CHECK-LE: vsbox 2, 5 # encoding: [0xc8,0x05,0x45,0x10] + vsbox 2, 5 + +# CHECK-BE: vcipher 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x08] +# CHECK-LE: vcipher 2, 5, 17 # encoding: [0x08,0x8d,0x45,0x10] + vcipher 2, 5, 17 + +# CHECK-BE: vcipherlast 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x09] +# CHECK-LE: vcipherlast 2, 5, 17 # encoding: [0x09,0x8d,0x45,0x10] + vcipherlast 2, 5, 17 + +# CHECK-BE: vncipher 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x48] +# CHECK-LE: vncipher 2, 5, 17 # encoding: [0x48,0x8d,0x45,0x10] + vncipher 2, 5, 17 + +# CHECK-BE: vncipherlast 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x49] +# CHECK-LE: vncipherlast 2, 5, 17 # encoding: [0x49,0x8d,0x45,0x10] + vncipherlast 2, 5, 17 + +# CHECK-BE: vpmsumb 2, 5, 17 # encoding: [0x10,0x45,0x8c,0x08] +# CHECK-LE: vpmsumb 2, 5, 17 # encoding: [0x08,0x8c,0x45,0x10] + vpmsumb 2, 5, 17 + +# CHECK-BE: vpmsumh 2, 5, 17 # encoding: [0x10,0x45,0x8c,0x48] +# CHECK-LE: vpmsumh 2, 5, 17 # encoding: [0x48,0x8c,0x45,0x10] + vpmsumh 2, 5, 17 + +# CHECK-BE: vpmsumw 2, 5, 17 # encoding: [0x10,0x45,0x8c,0x88] +# CHECK-LE: vpmsumw 2, 5, 17 # encoding: [0x88,0x8c,0x45,0x10] + vpmsumw 2, 5, 17 + +# CHECK-BE: vpmsumd 2, 5, 17 # encoding: [0x10,0x45,0x8c,0xc8] +# CHECK-LE: vpmsumd 2, 5, 17 # encoding: [0xc8,0x8c,0x45,0x10] + vpmsumd 2, 5, 17 + +# CHECK-BE: vshasigmaw 2, 3, 0, 11 # encoding: [0x10,0x43,0x5e,0x82] +# CHECK-LE: vshasigmaw 2, 3, 0, 11 # encoding: [0x82,0x5e,0x43,0x10] + vshasigmaw 2, 3, 0, 11 + +# CHECK-BE: vshasigmad 2, 3, 1, 15 # encoding: [0x10,0x43,0xfe,0xc2] +# CHECK-LE: vshasigmad 2, 3, 1, 15 # encoding: [0xc2,0xfe,0x43,0x10] + vshasigmad 2, 3, 1, 15 + # CHECK-BE: vsel 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x6a] # CHECK-LE: vsel 2, 3, 4, 5 # encoding: [0x6a,0x21,0x43,0x10] vsel 2, 3, 4, 5