diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1327,8 +1327,8 @@ int_ppc_altivec_crypto_vpmsumw, v4i32>; def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", int_ppc_altivec_crypto_vpmsumd, v2i64>; -def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", - int_ppc_altivec_crypto_vpermxor, v16i8>; +def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC), + "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>; // Vector doubleword integer pack and unpack. let hasSideEffects = 1 in { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2408,6 +2408,8 @@ // arbitrarily chosen to be Big, Little. // // Predicate combinations available: +// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr. +// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr. // [HasVSX] // [HasVSX, IsBigEndian] // [HasVSX, IsLittleEndian] @@ -2436,6 +2438,18 @@ // [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] +// These Altivec patterns are here because we need a VSX instruction to match +// the intrinsic (but only for little endian system). +let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC), + (COPY_TO_REGCLASS $c, VSRC))))>; +let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, $c))>; + let AddedComplexity = 400 in { // Valid for any VSX subtarget, regardless of endianness. let Predicates = [HasVSX] in { diff --git a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll --- a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll +++ b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll @@ -1,7 +1,11 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s ; FIXME: The original intent was to add a check-next for the blr after every check. ; However, this currently fails since we don't eliminate stores of the unused @@ -103,6 +107,7 @@ %2 = load <16 x i8>, <16 x i8>* %c, align 16 %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) ret <16 x i8> %3 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, } @@ -127,6 +132,7 @@ %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) %7 = bitcast <16 x i8> %6 to <8 x i16> ret <8 x i16> %7 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, } @@ -148,6 +154,7 @@ %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) %7 = bitcast <16 x i8> %6 to <4 x i32> ret <4 x i32> %7 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, } @@ -169,6 +176,7 @@ %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) %7 = bitcast <16 x i8> %6 to <2 x i64> ret <2 x i64> %7 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, }