Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -435,6 +435,12 @@ BUILTIN(__builtin_vsx_xxpermdi, "v.", "t") BUILTIN(__builtin_vsx_xxsldwi, "v.", "t") +// P10 Vector Blend built-ins. +BUILTIN(__builtin_vsx_xxblendvb, "V16UcV16UcV16UcV16Uc", "") +BUILTIN(__builtin_vsx_xxblendvh, "V8UsV8UsV8UsV8Us", "") +BUILTIN(__builtin_vsx_xxblendvw, "V4UiV4UiV4UiV4Ui", "") +BUILTIN(__builtin_vsx_xxblendvd, "V2ULLiV2ULLiV2ULLiV2ULLi", "") + // Float 128 built-ins BUILTIN(__builtin_sqrtf128_round_to_odd, "LLdLLd", "") BUILTIN(__builtin_addf128_round_to_odd, "LLdLLdLLd", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16830,6 +16830,66 @@ return __builtin_altivec_vclrrb((vector signed char)__a, __n); #endif } + +#ifdef __VSX__ + +/* vec_blendv */ + +static __inline__ vector signed char __ATTRS_o_ai vec_blendv( + vector signed char __a, vector signed char __b, vector unsigned char __c) { + return __builtin_vsx_xxblendvb(__a, __b, __c); +} + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_blendv(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c) { + return __builtin_vsx_xxblendvb(__a, __b, __c); +} + +static __inline__ vector signed short __ATTRS_o_ai +vec_blendv(vector signed short __a, vector signed short __b, + vector unsigned short __c) { + return __builtin_vsx_xxblendvh(__a, __b, __c); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_blendv(vector unsigned short __a, vector unsigned short __b, + vector unsigned short __c) { + return __builtin_vsx_xxblendvh(__a, __b, __c); +} + +static __inline__ vector signed int __ATTRS_o_ai vec_blendv( + vector signed int __a, vector signed int __b, vector unsigned int __c) { + return __builtin_vsx_xxblendvw(__a, __b, __c); +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_blendv( + vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { + return __builtin_vsx_xxblendvw(__a, __b, __c); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_blendv(vector signed long long __a, vector signed long long __b, + vector unsigned long long __c) { + return __builtin_vsx_xxblendvd(__a, __b, __c); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_blendv(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned long long __c) { + return __builtin_vsx_xxblendvd(__a, __b, __c); +} + +static __inline__ vector float __ATTRS_o_ai +vec_blendv(vector float __a, vector float __b, vector unsigned int __c) { + return __builtin_vsx_xxblendvw(__a, __b, __c); +} + +static __inline__ vector double __ATTRS_o_ai vec_blendv( + vector double __a, vector double __b, vector unsigned long long __c) { + return __builtin_vsx_xxblendvd(__a, __b, __c); +} +#endif /* __VSX__ */ #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -5,11 +5,16 @@ #include -vector signed char vsca; -vector unsigned char vuca; -vector unsigned short vusa; -vector unsigned int vuia; -vector unsigned long long vulla, vullb; +vector signed char vsca, vscb; +vector unsigned char vuca, vucb, vucc; +vector signed short vssa, vssb; +vector unsigned short vusa, vusb, vusc; +vector signed int vsia, vsib; +vector unsigned int vuia, vuib, vuic; +vector signed long long vslla, vsllb; +vector unsigned long long vulla, vullb, vullc; +vector float vfa, vfb; +vector double vda, vdb; unsigned int uia; vector unsigned long long test_vpdepd(void) { @@ -79,3 +84,65 @@ // CHECK-LE-NEXT: ret <16 x i8> return vec_clrr(vuca, uia); } + +vector signed char test_vec_blend_sc(void) { + // CHECK: @llvm.ppc.vsx.xxblendvb(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_blendv(vsca, vscb, vucc); +} + +vector unsigned char test_vec_blend_uc(void) { + // CHECK: @llvm.ppc.vsx.xxblendvb(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_blendv(vuca, vucb, vucc); +} + +vector signed short test_vec_blend_ss(void) { + // CHECK: @llvm.ppc.vsx.xxblendvh(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_blendv(vssa, vssb, vusc); +} + +vector unsigned short test_vec_blend_us(void) { + // CHECK: @llvm.ppc.vsx.xxblendvh(<8 x i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_blendv(vusa, vusb, vusc); +} + +vector signed int test_vec_blend_si(void) { + // CHECK: @llvm.ppc.vsx.xxblendvw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_blendv(vsia, vsib, vuic); +} + +vector unsigned int test_vec_blend_ui(void) { + // CHECK: @llvm.ppc.vsx.xxblendvw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_blendv(vuia, vuib, vuic); +} + +vector signed long long test_vec_blend_sll(void) { + // CHECK: @llvm.ppc.vsx.xxblendvd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_blendv(vslla, vsllb, vullc); +} + +vector unsigned long long test_vec_blend_ull(void) { + // CHECK: @llvm.ppc.vsx.xxblendvd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_blendv(vulla, vullb, vullc); +} + +vector float test_vec_blend_f(void) { + // CHECK: @llvm.ppc.vsx.xxblendvw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> + // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float> + // CHECK-NEXT: ret <4 x float> + return vec_blendv(vfa, vfb, vuic); +} + +vector double test_vec_blend_d(void) { + // CHECK: @llvm.ppc.vsx.xxblendvd(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> + // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double> + // CHECK-NEXT: ret <2 x double> + return vec_blendv(vda, vdb, vullc); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -965,6 +965,20 @@ def int_ppc_vsx_xxgenpcvdm : PowerPC_VSX_Intrinsic<"xxgenpcvdm", [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +// P10 VSX Vector Blend Variable. +def int_ppc_vsx_xxblendvb: GCCBuiltin<"__builtin_vsx_xxblendvb">, + Intrinsic<[llvm_v16i8_ty],[llvm_v16i8_ty, + llvm_v16i8_ty,llvm_v16i8_ty], [IntrNoMem]>; +def int_ppc_vsx_xxblendvh: GCCBuiltin<"__builtin_vsx_xxblendvh">, + Intrinsic<[llvm_v8i16_ty],[llvm_v8i16_ty, + llvm_v8i16_ty,llvm_v8i16_ty], [IntrNoMem]>; +def int_ppc_vsx_xxblendvw: GCCBuiltin<"__builtin_vsx_xxblendvw">, + Intrinsic<[llvm_v4i32_ty],[llvm_v4i32_ty, + llvm_v4i32_ty,llvm_v4i32_ty], [IntrNoMem]>; +def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">, + Intrinsic<[llvm_v2i64_ty],[llvm_v2i64_ty, + llvm_v2i64_ty,llvm_v2i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -177,6 +177,34 @@ let Inst{31} = XT{5}; } +class 8RR_XX4_XTABC6 opcode, bits<2> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; + let Inst{14-31} = 0; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-57} = XC{4-0}; + let Inst{58-59} = xo; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + multiclass MLS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, dag PCRel_IOL, string asmstr, InstrItinClass itin> { @@ -517,6 +545,34 @@ def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; } +let Predicates = [PrefixInstrs] in { + def XXBLENDVB : + 8RR_XX4_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvb $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; + def XXBLENDVH : + 8RR_XX4_XTABC6<33, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvh $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; + def XXBLENDVW : + 8RR_XX4_XTABC6<33, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvw $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; + def XXBLENDVD : + 8RR_XX4_XTABC6<33, 3, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC), + "xxblendvd $XT, $XA, $XB, $XC", IIC_VecGeneral, []>; +} + +let Predicates = [PrefixInstrs] in { + def : Pat<(v16i8 (int_ppc_vsx_xxblendvb v16i8:$A, v16i8:$B, v16i8:$C)), + (COPY_TO_REGCLASS (XXBLENDVB (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; + def : Pat<(v8i16 (int_ppc_vsx_xxblendvh v8i16:$A, v8i16:$B, v8i16:$C)), + (COPY_TO_REGCLASS (XXBLENDVH (COPY_TO_REGCLASS $A, VSRC), + (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $C, VSRC)), VSRC)>; + def : Pat<(int_ppc_vsx_xxblendvw v4i32:$A, v4i32:$B, v4i32:$C), + (XXBLENDVW $A, $B, $C)>; + def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C), + (XXBLENDVD $A, $B, $C)>; +} + let Predicates = [IsISA3_1] in { def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpdepd $vD, $vA, $vB", IIC_VecGeneral, Index: llvm/test/CodeGen/PowerPC/p10-permute-ops.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-permute-ops.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + +define <16 x i8> @testXXBLENDVB(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: testXXBLENDVB: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvb v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.vsx.xxblendvb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %0 +} +declare <16 x i8> @llvm.ppc.vsx.xxblendvb(<16 x i8>, <16 x i8>, <16 x i8>) + +define <8 x i16> @testXXBLENDVH(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: testXXBLENDVH: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvh v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <8 x i16> @llvm.ppc.vsx.xxblendvh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) + ret <8 x i16> %0 +} +declare <8 x i16> @llvm.ppc.vsx.xxblendvh(<8 x i16>, <8 x i16>, <8 x i16>) + +define <4 x i32> @testXXBLENDVW(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: testXXBLENDVW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvw v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.vsx.xxblendvw(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.vsx.xxblendvw(<4 x i32>, <4 x i32>, <4 x i32>) + +define <2 x i64> @testXXBLENDVD(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: testXXBLENDVD: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxblendvd v2, v2, v3, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x i64> @llvm.ppc.vsx.xxblendvd(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %0 +} +declare <2 x i64> @llvm.ppc.vsx.xxblendvd(<2 x i64>, <2 x i64>, <2 x i64>) Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -30,3 +30,15 @@ # CHECK: vclrrb 1, 4, 3 0x10 0x24 0x19 0xcd + +# CHECK: xxblendvb 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0x8c + +# CHECK: xxblendvh 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0x9c + +# CHECK: xxblendvw 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0xac + +# CHECK: xxblendvd 6, 63, 21, 34 +0x05 0x00 0x00 0x00 0x84 0xdf 0xa8 0xbc Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -33,3 +33,23 @@ # CHECK-BE: vclrrb 1, 4, 3 # encoding: [0x10,0x24,0x19,0xcd] # CHECK-LE: vclrrb 1, 4, 3 # encoding: [0xcd,0x19,0x24,0x10] vclrrb 1, 4, 3 +# CHECK-BE: xxblendvb 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0x8c] +# CHECK-LE: xxblendvb 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0x8c,0xa8,0xdf,0x84] + xxblendvb 6, 63, 21, 34 +# CHECK-BE: xxblendvh 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0x9c] +# CHECK-LE: xxblendvh 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0x9c,0xa8,0xdf,0x84] + xxblendvh 6, 63, 21, 34 +# CHECK-BE: xxblendvw 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0xac] +# CHECK-LE: xxblendvw 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xac,0xa8,0xdf,0x84] + xxblendvw 6, 63, 21, 34 +# CHECK-BE: xxblendvd 6, 63, 21, 34 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x84,0xdf,0xa8,0xbc] +# CHECK-LE: xxblendvd 6, 63, 21, 34 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xbc,0xa8,0xdf,0x84] + xxblendvd 6, 63, 21, 34