Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -447,6 +447,9 @@ BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "") +// P10 Vector Permute Extended built-in. +BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "") + // Float 128 built-ins BUILTIN(__builtin_sqrtf128_round_to_odd, "LLdLLd", "") BUILTIN(__builtin_addf128_round_to_odd, "LLdLLdLLd", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16881,6 +16881,42 @@ return __builtin_altivec_vctzdm(__a, __b); } +#ifdef __VSX__ + +/* vec_permx */ + +#define vec_permx(__a, __b, __c, __d) \ + _Generic((__a), vector signed char \ + : (vector signed char)__builtin_vsx_xxpermx( \ + (vector signed char)(__a), (__b), (__c), (__d)), \ + vector unsigned char \ + : (vector unsigned char)__builtin_vsx_xxpermx( \ + (vector unsigned char)(__a), (__b), (__c), (__d)), \ + vector signed short \ + : (vector signed short)__builtin_vsx_xxpermx( \ + (vector signed short)(__a), (__b), (__c), (__d)), \ + vector unsigned short \ + : (vector unsigned short)__builtin_vsx_xxpermx( \ + (vector unsigned short)(__a), (__b), (__c), (__d)), \ + vector signed int \ + : (vector signed int)__builtin_vsx_xxpermx( \ + (vector signed int)(__a), (__b), (__c), (__d)), \ + vector unsigned int \ + : (vector unsigned int)__builtin_vsx_xxpermx( \ + (vector unsigned int)(__a), (__b), (__c), (__d)), \ + vector signed long long \ + : (vector signed long long)__builtin_vsx_xxpermx( \ + (vector signed long long)(__a), (__b), (__c), (__d)), \ + vector unsigned long long \ + : (vector unsigned long long)__builtin_vsx_xxpermx( \ + (vector unsigned long long)(__a), (__b), (__c), (__d)), \ + vector float \ + : (vector float)__builtin_vsx_xxpermx((vector float)(__a), (__b), \ + (__c), (__d)), \ + vector double \ + : (vector double)__builtin_vsx_xxpermx((vector double)(__a), (__b), \ + (__c), (__d))) +#endif /* __VSX__ */ #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -3128,6 +3128,8 @@ return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7); case PPC::BI__builtin_vsx_xxeval: return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255); + case PPC::BI__builtin_vsx_xxpermx: + return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); } return SemaBuiltinConstantArgRange(TheCall, i, l, u); } Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -5,12 +5,17 @@ #include -vector signed char vsca; +vector signed char vsca, vscb; vector unsigned char vuca, vucb, vucc; +vector signed short vssa, vssb; vector unsigned short vusa, vusb, vusc; +vector signed int vsia, vsib; vector unsigned int vuia, vuib, vuic; +vector signed long long vslla, vsllb; vector unsigned long long vulla, vullb, vullc; vector unsigned __int128 vui128a, vui128b, vui128c; +vector float vfa, vfb; +vector double vda, vdb; unsigned int uia; vector unsigned long long test_vpdepd(void) { @@ -146,3 +151,71 @@ // CHECK-NEXT: ret <2 x i64> return vec_cnttzm(vulla, vullb); } + +vector signed char test_vec_permx_sc(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_permx(vsca, vscb, vucc, 0); +} + +vector unsigned char test_vec_permx_uc(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret <16 x i8> + return vec_permx(vuca, vucb, vucc, 1); +} + +vector signed short test_vec_permx_ss(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_permx(vssa, vssb, vucc, 2); +} + +vector unsigned short test_vec_permx_us(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_permx(vusa, vusb, vucc, 3); +} + +vector signed int test_vec_permx_si(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_permx(vsia, vsib, vucc, 4); +} + +vector unsigned int test_vec_permx_ui(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_permx(vuia, vuib, vucc, 5); +} + +vector signed long long test_vec_permx_sll(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_permx(vslla, vsllb, vucc, 6); +} + +vector unsigned long long test_vec_permx_ull(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_permx(vulla, vullb, vucc, 7); +} + +vector float test_vec_permx_f(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x float> + // CHECK-NEXT: ret <4 x float> + return vec_permx(vfa, vfb, vucc, 0); +} + +vector double test_vec_permx_d(void) { + // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32 + // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x double> + // CHECK-NEXT: ret <2 x double> + return vec_permx(vda, vdb, vucc, 1); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1007,6 +1007,12 @@ def int_ppc_vsx_xxgenpcvdm : PowerPC_VSX_Intrinsic<"xxgenpcvdm", [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +// P10 VSX Vector permute extended. +def int_ppc_vsx_xxpermx: GCCBuiltin<"__builtin_vsx_xxpermx">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty,llvm_v16i8_ty,llvm_v16i8_ty,llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } //===----------------------------------------------------------------------===// Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -733,6 +733,14 @@ IIC_VecGeneral, []>; } +let Predicates = [PrefixInstrs] in { + def : Pat<(v16i8 + (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C , timm:$D)), + (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $C, VSRC), $D), + VSRC)>; +} + let Predicates = [IsISA3_1] in { def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH), Index: llvm/test/CodeGen/PowerPC/p10-permute-ops.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-permute-ops.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + +define <16 x i8> @testXXPERMX(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: testXXPERMX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxpermx v2, v2, v3, v4, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8>, <16 x i8>, <16 x i8>, i32 immarg)