diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -397,6 +397,7 @@ BUILTIN(__builtin_altivec_vgbbd, "V16UcV16Uc", "") BUILTIN(__builtin_altivec_vbpermq, "V2ULLiV16UcV16Uc", "") +BUILTIN(__builtin_altivec_vbpermd, "V2ULLiV2ULLiV16Uc", "") // P8 Crypto built-ins. BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -17369,12 +17369,22 @@ } #if defined(__powerpc64__) && defined(__SIZEOF_INT128__) -static __inline__ vector unsigned long long __attribute__((__always_inline__)) +static __inline__ vector unsigned long long __ATTRS_o_ai vec_bperm(vector unsigned __int128 __a, vector unsigned char __b) { return __builtin_altivec_vbpermq((vector unsigned char)__a, (vector unsigned char)__b); } #endif +static __inline__ vector unsigned char __ATTRS_o_ai +vec_bperm(vector unsigned char __a, vector unsigned char __b) { + return __builtin_altivec_vbpermq(__a, __b); +} +#endif // __POWER8_VECTOR__ +#ifdef __POWER9_VECTOR__ +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_bperm(vector unsigned long long __a, vector unsigned char __b) { + return __builtin_altivec_vbpermd(__a, __b); +} #endif diff --git a/clang/test/CodeGen/builtins-ppc-p8vector.c b/clang/test/CodeGen/builtins-ppc-p8vector.c --- a/clang/test/CodeGen/builtins-ppc-p8vector.c +++ b/clang/test/CodeGen/builtins-ppc-p8vector.c @@ -1177,10 +1177,13 @@ // CHECK: llvm.ppc.altivec.vgbbd // CHECK-LE: llvm.ppc.altivec.vgbbd - res_vull = vec_bperm(vux, vux); -// CHECK: llvm.ppc.altivec.vbpermq -// CHECK-LE: llvm.ppc.altivec.vbpermq -// CHECK-PPC: warning: implicit declaration of function 'vec_bperm' + res_vull = vec_bperm(vux, vuc); + // CHECK: llvm.ppc.altivec.vbpermq + // CHECK-LE: llvm.ppc.altivec.vbpermq + + res_vull = vec_bperm(vuc, vuc); + // CHECK: llvm.ppc.altivec.vbpermq + // CHECK-LE: llvm.ppc.altivec.vbpermq res_vsll = vec_neg(vsll); // CHECK: sub <2 x i64> zeroinitializer, {{%[0-9]+}} diff --git a/clang/test/CodeGen/builtins-ppc-p9vector.c b/clang/test/CodeGen/builtins-ppc-p9vector.c --- a/clang/test/CodeGen/builtins-ppc-p9vector.c +++ b/clang/test/CodeGen/builtins-ppc-p9vector.c @@ -1260,3 +1260,9 @@ // CHECK-NEXT: ret <2 x i64> return vec_signextll(vsia); } + +vector unsigned long long test_vbpermd(void) { + // CHECK: @llvm.ppc.altivec.vbpermd(<2 x i64> + // CHECK-BE: @llvm.ppc.altivec.vbpermd(<2 x i64> + return vec_bperm(vula, vuca); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1042,6 +1042,9 @@ def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vbpermd : GCCBuiltin<"__builtin_altivec_vbpermd">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], + [IntrNoMem]>; } def int_ppc_altivec_vexptefp : PowerPC_Vec_FF_Intrinsic<"vexptefp">; diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1518,8 +1518,8 @@ (int_ppc_altivec_vprtybq v1i128:$vB))]>; // Vector (Bit) Permute (Right-indexed) -def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vbpermd $vD, $vA, $vB", IIC_VecFP, []>; +def VBPERMD : VX1_Int_Ty3<1484, "vbpermd", int_ppc_altivec_vbpermd, + v2i64, v2i64, v16i8>; def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), "vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>; diff --git a/llvm/test/CodeGen/PowerPC/p9-vbpermd.ll b/llvm/test/CodeGen/PowerPC/p9-vbpermd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p9-vbpermd.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s + +@vull = common global <2 x i64> zeroinitializer, align 16 +@vuc = common global <16 x i8> zeroinitializer, align 16 +@res_vull = common global <2 x i64> zeroinitializer, align 16 + +define void @test1() { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LC0@toc@ha +; CHECK-NEXT: ld 3, .LC0@toc@l(3) +; CHECK-NEXT: lxv 34, 0(3) +; CHECK-NEXT: addis 3, 2, .LC1@toc@ha +; CHECK-NEXT: ld 3, .LC1@toc@l(3) +; CHECK-NEXT: lxv 35, 0(3) +; CHECK-NEXT: addis 3, 2, .LC2@toc@ha +; CHECK-NEXT: ld 3, .LC2@toc@l(3) +; CHECK-NEXT: vbpermd 2, 2, 3 +; CHECK-NEXT: stxv 34, 0(3) +; CHECK-NEXT: blr +entry: + %0 = load <2 x i64>, <2 x i64>* @vull, align 16 + %1 = load <16 x i8>, <16 x i8>* @vuc, align 16 + %2 = call <2 x i64> @llvm.ppc.altivec.vbpermd(<2 x i64> %0, <16 x i8> %1) + store <2 x i64> %2, <2 x i64>* @res_vull, align 16 + ret void +} +declare <2 x i64> @llvm.ppc.altivec.vbpermd(<2 x i64>, <16 x i8>)