diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -404,6 +404,7 @@ // P8 Crypto built-ins. BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_crypto_vpermxor, "V16UcV16UcV16UcV16Uc", "") +BUILTIN(__builtin_altivec_crypto_vpermxor_be, "V16UcV16UcV16UcV16Uc", "") BUILTIN(__builtin_altivec_crypto_vshasigmaw, "V4UiV4UiIiIi", "") BUILTIN(__builtin_altivec_crypto_vshasigmad, "V2ULLiV2ULLiIiIi", "") BUILTIN(__builtin_altivec_crypto_vcipher, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/test/CodeGen/builtins-ppc-crypto.c b/clang/test/CodeGen/builtins-ppc-crypto.c --- a/clang/test/CodeGen/builtins-ppc-crypto.c +++ b/clang/test/CodeGen/builtins-ppc-crypto.c @@ -132,6 +132,62 @@ // CHECK: @llvm.ppc.altivec.crypto.vpermxor } +// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorb_be +vector unsigned char test_vpermxorb_be(vector unsigned char a, + vector unsigned char b, + vector unsigned char c) { + return __builtin_altivec_crypto_vpermxor_be(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be +} + +// CHECK-LABEL: define{{.*}} <8 x i16> @test_vpermxorh_be +vector unsigned short test_vpermxorh_be(vector unsigned short a, + vector unsigned short b, + vector unsigned short c) { + return __builtin_altivec_crypto_vpermxor_be(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be +} + +// CHECK-LABEL: define{{.*}} <4 x i32> @test_vpermxorw_be +vector unsigned int test_vpermxorw_be(vector unsigned int a, + vector unsigned int b, + vector unsigned int c) { + return __builtin_altivec_crypto_vpermxor_be(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be +} + +// CHECK-LABEL: define{{.*}} <2 x i64> @test_vpermxord_be +vector unsigned long long test_vpermxord_be(vector unsigned long long a, + vector unsigned long long b, + vector unsigned long long c) { + return __builtin_altivec_crypto_vpermxor_be(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be +} + +// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorbc_be +vector bool char test_vpermxorbc_be(vector bool char a, + vector bool char b, + vector bool char c) { + return __builtin_altivec_crypto_vpermxor_be(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be +} + +// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxorsc_be +vector signed char test_vpermxorsc_be(vector signed char a, + vector signed char b, + vector signed char c) { + return __builtin_altivec_crypto_vpermxor_be(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be +} + +// CHECK-LABEL: define{{.*}} <16 x i8> @test_vpermxoruc_be +vector unsigned char test_vpermxoruc_be(vector unsigned char a, + vector unsigned char b, + vector unsigned char c) { + return __builtin_altivec_crypto_vpermxor_be(a, b, c); +// CHECK: @llvm.ppc.altivec.crypto.vpermxor.be +} + // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcipher vector unsigned long long test_vcipher(void) { diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1087,6 +1087,10 @@ GCCBuiltin<"__builtin_altivec_crypto_vpermxor">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_crypto_vpermxor_be : + GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, + llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_crypto_vshasigmad : GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">, diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2471,6 +2471,7 @@ // [HasVSX, HasP8Vector, IsLittleEndian] // [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] +// [HasVSX, HasP8Altivec] // [HasVSX, HasDirectMove] // [HasVSX, HasDirectMove, IsBigEndian] // [HasVSX, HasDirectMove, IsLittleEndian] @@ -2500,6 +2501,10 @@ def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, v16i8:$b, v16i8:$c)), (v16i8 (VPERMXOR $a, $b, $c))>; +let Predicates = [HasVSX, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor_be v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, $c))>; let AddedComplexity = 400 in { // Valid for any VSX subtarget, regardless of endianness. diff --git a/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK-P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK-BE-P8 + +define <16 x i8> @test_vpermxorb() local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_vpermxorb: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI0_1@toc@ha +; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-LE-P8-NEXT: lvx 2, 0, 3 +; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI0_1@toc@l +; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-P9-LABEL: test_vpermxorb: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-P9-NEXT: lxv 34, 0(3) +; CHECK-P9-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-P9-NEXT: lxv 35, 0(3) +; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_vpermxorb: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI0_1@toc@ha +; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI0_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3 +; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4 +; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-BE-P8-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> ) + ret <16 x i8> %0 +} + +declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8>, <16 x i8>, <16 x i8>) + +define <8 x i16> @test_vpermxorh() local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_vpermxorh: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI1_1@toc@ha +; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-LE-P8-NEXT: lvx 2, 0, 3 +; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI1_1@toc@l +; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-P9-LABEL: test_vpermxorh: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-P9-NEXT: lxv 34, 0(3) +; CHECK-P9-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI1_1@toc@l +; CHECK-P9-NEXT: lxv 35, 0(3) +; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_vpermxorh: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI1_1@toc@ha +; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI1_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3 +; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4 +; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-BE-P8-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> ) + %1 = bitcast <16 x i8> %0 to <8 x i16> + ret <8 x i16> %1 +} + +define <4 x i32> @test_vpermxorw() local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_vpermxorw: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI2_1@toc@ha +; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK-LE-P8-NEXT: lvx 2, 0, 3 +; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI2_1@toc@l +; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-P9-LABEL: test_vpermxorw: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK-P9-NEXT: lxv 34, 0(3) +; CHECK-P9-NEXT: addis 3, 2, .LCPI2_1@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI2_1@toc@l +; CHECK-P9-NEXT: lxv 35, 0(3) +; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_vpermxorw: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI2_1@toc@ha +; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI2_0@toc@l +; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI2_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3 +; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4 +; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-BE-P8-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> ) + %1 = bitcast <16 x i8> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <2 x i64> @test_vpermxord() local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_vpermxord: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK-LE-P8-NEXT: addis 4, 2, .LCPI3_1@toc@ha +; CHECK-LE-P8-NEXT: addi 3, 3, .LCPI3_0@toc@l +; CHECK-LE-P8-NEXT: lvx 2, 0, 3 +; CHECK-LE-P8-NEXT: addi 3, 4, .LCPI3_1@toc@l +; CHECK-LE-P8-NEXT: lvx 3, 0, 3 +; CHECK-LE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-P9-LABEL: test_vpermxord: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI3_0@toc@l +; CHECK-P9-NEXT: lxv 34, 0(3) +; CHECK-P9-NEXT: addis 3, 2, .LCPI3_1@toc@ha +; CHECK-P9-NEXT: addi 3, 3, .LCPI3_1@toc@l +; CHECK-P9-NEXT: lxv 35, 0(3) +; CHECK-P9-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_vpermxord: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK-BE-P8-NEXT: addis 4, 2, .LCPI3_1@toc@ha +; CHECK-BE-P8-NEXT: addi 3, 3, .LCPI3_0@toc@l +; CHECK-BE-P8-NEXT: addi 4, 4, .LCPI3_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x 34, 0, 3 +; CHECK-BE-P8-NEXT: lxvw4x 35, 0, 4 +; CHECK-BE-P8-NEXT: vpermxor 2, 3, 2, 2 +; CHECK-BE-P8-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor.be(<16 x i8> , <16 x i8> , <16 x i8> ) + %1 = bitcast <16 x i8> %0 to <2 x i64> + ret <2 x i64> %1 +} +