Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -446,6 +446,11 @@ BUILTIN(__builtin_vsx_scalar_extract_expq, "ULLiLLd", "") BUILTIN(__builtin_vsx_scalar_insert_exp_qp, "LLdLLdULLi", "") +//P10 Vector Splat built-ins. +BUILTIN(__builtin_vsx_xxspltiw, "V4UiIi", "") +BUILTIN(__builtin_vsx_xxspltidp, "V2dIi", "") +BUILTIN(__builtin_vsx_xxsplti32dx, "V4UiV4UiIiIi", "") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16830,6 +16830,76 @@ return __builtin_altivec_vclrrb((vector signed char)__a, __n); #endif } + +#ifdef __VSX__ + +/* vec_splati */ + +#define vec_splati(__a) \ + _Generic((__a), signed int \ + : ((vector signed int)__a), unsigned int \ + : ((vector unsigned int)__a), float \ + : ((vector float)__a)) + + +/* vec_spatid */ + +static __inline__ vector double __ATTRS_o_ai vec_splatid(const float __a) { + return ((vector double)((double)__a)); +} + +/* vec_splati_ins */ + +#ifdef __LITTLE_ENDIAN__ +static __inline__ vector signed int __ATTRS_o_ai vec_splati_ins( + vector signed int __a, const unsigned int __b, const signed int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[1 - __b] = __c; + __a[2 + 1 - __b] = __c; + return __a; +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_splati_ins( + vector unsigned int __a, const unsigned int __b, const unsigned int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[1 - __b] = __c; + __a[2 + 1 - __b] = __c; + return __a; +} + +static __inline__ vector float __ATTRS_o_ai +vec_splati_ins(vector float __a, const unsigned int __b, const float __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[1 - __b] = __c; + __a[2 + 1 - __b] = __c; + return __a; +} +#else +static __inline__ vector signed int __ATTRS_o_ai vec_splati_ins( + vector signed int __a, const unsigned int __b, const signed int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[__b] = __c; + __a[2 + __b] = __c; + return __a; +} + +static __inline__ vector unsigned int __ATTRS_o_ai vec_splati_ins( + vector unsigned int __a, const unsigned int __b, const unsigned int __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[__b] = __c; + __a[2 + __b] = __c; + return __a; +} + +static __inline__ vector float __ATTRS_o_ai +vec_splati_ins(vector float __a, const unsigned int __b, const float __c) { + assert((__b == 0 || __b == 1) && "The second argument must be 0 or 1"); + __a[__b] = __c; + __a[2 + __b] = __c; + return __a; +} +#endif +#endif /* __VSX__ */ #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -3124,6 +3124,8 @@ SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); case PPC::BI__builtin_pack_vector_int128: return SemaVSXCheck(TheCall); + case PPC::BI__builtin_vsx_xxsplti32dx: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); } return SemaBuiltinConstantArgRange(TheCall, i, l, u); } Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -10,6 +10,8 @@ vector unsigned short vusa; vector unsigned int vuia; vector unsigned long long vulla, vullb; +vector signed int vsia; +vector float vfa; unsigned int uia; vector unsigned long long test_vpdepd(void) { @@ -79,3 +81,47 @@ // CHECK-LE-NEXT: ret <16 x i8> return vec_clrr(vuca, uia); } + +vector signed int test_vec_vec_splati_si(void) { + // CHECK: ret <4 x i32> + return vec_splati(-1); +} + +vector unsigned int test_vec_vec_splati_ui(void) { + // CHECK: ret <4 x i32> + return vec_splati(1); +} + +vector float test_vec_vec_splati_f(void) { + // CHECK: ret <4 x float> + return vec_splati(1.0f); +} + +vector double test_vec_vec_splatid(void) { + // CHECK: ret <2 x double> + return vec_splatid(1.0); +} + +vector signed int test_vec_vec_splati_ins_si(void) { + // CHECK-BE: insertelement <4 x i32> + // CHECK-BE: ret <4 x i32> + // CHECK-LE: insertelement <4 x i32> + // CHECK-LE: ret <4 x i32> + return vec_splati_ins(vsia, 0, 1); +} + +vector unsigned int test_vec_vec_splati_ins_ui(void) { + // CHECK-BE: insertelement <4 x i32> + // CHECK-BE: ret <4 x i32> + // CHECK-LE: insertelement <4 x i32> + // CHECK-LE: ret <4 x i32> + return vec_splati_ins(vuia, 0, (unsigned int)1); +} + +vector float test_vec_vec_splati_ins_f(void) { + // CHECK-BE: insertelement <4 x float> + // CHECK-BE: ret <4 x float> + // CHECK-LE: insertelement <4 x float> + // CHECK-LE: ret <4 x float> + return vec_splati_ins(vfa, 0, 1.0); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -965,6 +965,16 @@ def int_ppc_vsx_xxgenpcvdm : PowerPC_VSX_Intrinsic<"xxgenpcvdm", [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; +// P10 VSX Vector Splat. +def int_ppc_vsx_xxspltiw : GCCBuiltin<"__builtin_vsx_xxspltiw">, + Intrinsic<[llvm_v4i32_ty],[llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_ppc_vsx_xxspltidp: GCCBuiltin<"__builtin_vsx_xxspltidp">, + Intrinsic<[llvm_v2f64_ty],[llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_ppc_vsx_xxsplti32dx: GCCBuiltin<"__builtin_vsx_xxsplti32dx">, + Intrinsic<[llvm_v4i32_ty],[llvm_v4i32_ty,llvm_i1_ty,llvm_i32_ty], + [IntrNoMem, ImmArg>, ImmArg>]>; } //===----------------------------------------------------------------------===// Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -97,6 +97,11 @@ /// XXSPLT, + /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for + /// converting immediate single precision numbers to double precision + /// vector or scalar. + XXSPLTI_SP_TO_DP, + /// VECINSERT - The PPC vector insert instruction /// VECINSERT, Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1474,6 +1474,8 @@ case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; + case PPCISD::XXSPLTI_SP_TO_DP: + return "PPCISD::XXSPLTI_SP_TO_DP"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -50,6 +50,10 @@ SDTCisVec<1>, SDTCisInt<2> ]>; +def SDT_PPCSpToDp : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, + SDTCisInt<1> +]>; + def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; @@ -194,6 +198,7 @@ def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; +def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>; def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -177,6 +177,58 @@ let Inst{31} = XT{5}; } +// 8RR:D-Form: [ 1 1 0 // // imm0 +// PO T XO TX imm1 ]. +class 8RR_DForm_IMM32_XT6 opcode, bits<4> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<32> IMM32; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; // reserved + let Inst{14-15} = 0; // reserved + let Inst{16-31} = IMM32{31-16}; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-46} = xo; + let Inst{47} = XT{5}; + let Inst{48-63} = IMM32{15-0}; +} + +// 8RR:D-Form: [ 1 1 0 // // imm0 +// PO T XO IX TX imm1 ]. +class 8RR_DForm_IMM32_XT6_IX opcode, bits<3> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bit IX; + bits<32> IMM32; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8-11} = 0; + let Inst{12-13} = 0; // reserved + let Inst{14-15} = 0; // reserved + let Inst{16-31} = IMM32{31-16}; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-45} = xo; + let Inst{46} = IX; + let Inst{47} = XT{5}; + let Inst{48-63} = IMM32{15-0}; +} + multiclass MLS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, dag PCRel_IOL, string asmstr, InstrItinClass itin> { @@ -517,6 +569,30 @@ def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>; } +let Predicates = [PrefixInstrs] in { + def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT), + (ins i32imm:$IMM32), + "xxspltiw $XT, $IMM32", IIC_VecGeneral, + []>; + def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT), + (ins i32imm:$IMM32), + "xxspltidp $XT, $IMM32", IIC_VecGeneral, + [(set v2f64:$XT, + (PPCxxspltidp i32:$IMM32))]>; + def XXSPLTI32DX : + 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), + (ins vsrc:$XTi, i1imm:$IX, i32imm:$IMM32), + "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + + def : Pat<(int_ppc_vsx_xxspltiw timm:$A), + (XXSPLTIW $A)>; + def : Pat<(int_ppc_vsx_xxspltidp timm:$A), + (XXSPLTIDP $A)>; + def : Pat<(int_ppc_vsx_xxsplti32dx v4i32:$A, i1:$B, timm:$C), + (XXSPLTI32DX $A, $B, $C)>; +} + let Predicates = [IsISA3_1] in { def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpdepd $vD, $vA, $vB", IIC_VecGeneral, Index: llvm/test/CodeGen/PowerPC/p10-permute-ops.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-permute-ops.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + + +define <4 x i32> @testXXSPLTIW() { +; CHECK-LABEL: testXXSPLTIW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltiw v2, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.vsx.xxspltiw(i32 1) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.vsx.xxspltiw(i32 immarg) + +define <2 x double> @testXXSPLTIDP() { +; CHECK-LABEL: testXXSPLTIDP: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltidp v2, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.ppc.vsx.xxspltidp(i32 1) + ret <2 x double> %0 +} +declare <2 x double> @llvm.ppc.vsx.xxspltidp(i32 immarg) + +define <4 x i32> @testXXSPLTI32DX(<4 x i32> %a) { +; CHECK-LABEL: testXXSPLTI32DX: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx v2, -1, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <4 x i32> @llvm.ppc.vsx.xxsplti32dx(<4 x i32> %a, i1 true, i32 1) + ret <4 x i32> %0 +} +declare <4 x i32> @llvm.ppc.vsx.xxsplti32dx(<4 x i32>, i1 immarg, i32 immarg) Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -30,3 +30,26 @@ # CHECK: vclrrb 1, 4, 3 0x10 0x24 0x19 0xcd + +# Boundary conditions of 8RR_DForm_IMM32_XT6's immediates +# CHECK: xxspltiw 63, 4294901760 +0x05 0x00 0xff 0xff 0x83 0xe7 0x00 0x00 + +# CHECK: xxspltiw 63, 65535 +0x05 0x00 0x00 0x00 0x83 0xe7 0xff 0xff + +# CHECK: xxspltiw 63, 4294967295 +0x05 0x00 0xff 0xff 0x83 0xe7 0xff 0xff + +# CHECK: xxspltidp 63, 4294967295 +0x05 0x00 0xff 0xff 0x83 0xe5 0xff 0xff + +# Boundary conditions of 8RR_DForm_IMM32_XT6_IX's immediates +# CHECK: xxsplti32dx 63, 1, 4294901760 +0x05 0x00 0xff 0xff 0x83 0xe3 0x00 0x00 + +# CHECK: xxsplti32dx 63, 1, 65535 +0x05 0x00 0x00 0x00 0x83 0xe3 0xff 0xff + +# CHECK: xxsplti32dx 63, 1, 4294967295 +0x05 0x00 0xff 0xff 0x83 0xe3 0xff 0xff Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -33,3 +33,50 @@ # CHECK-BE: vclrrb 1, 4, 3 # encoding: [0x10,0x24,0x19,0xcd] # CHECK-LE: vclrrb 1, 4, 3 # encoding: [0xcd,0x19,0x24,0x10] vclrrb 1, 4, 3 +# Boundary conditions of 8RR_DForm_IMM32_XT6's immediates +# CHECK-BE: xxspltiw 63, 4294901760 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe7,0x00,0x00] +# CHECK-LE: xxspltiw 63, 4294901760 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0x00,0x00,0xe7,0x83] + xxspltiw 63, 4294901760 +# CHECK-BE: xxspltiw 63, 65535 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x83,0xe7,0xff,0xff] +# CHECK-LE: xxspltiw 63, 65535 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe7,0x83] + xxspltiw 63, 65535 +# CHECK-BE: xxspltiw 63, 4294967295 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe7,0xff,0xff] +# CHECK-LE: xxspltiw 63, 4294967295 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe7,0x83] + xxspltiw 63, 4294967295 +# CHECK-BE: xxspltiw 63, -1 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe7,0xff,0xff] +# CHECK-LE: xxspltiw 63, -1 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe7,0x83] + xxspltiw 63, -1 +# CHECK-BE: xxspltidp 63, 4294967295 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe5,0xff,0xff] +# CHECK-LE: xxspltidp 63, 4294967295 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe5,0x83] + xxspltidp 63, 4294967295 +# Boundary conditions of 8RR_DForm_IMM32_XT6_IX's immediates +# CHECK-BE: xxsplti32dx 63, 1, 4294901760 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe3,0x00,0x00] +# CHECK-LE: xxsplti32dx 63, 1, 4294901760 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0x00,0x00,0xe3,0x83] + xxsplti32dx 63, 1, 4294901760 +# CHECK-BE: xxsplti32dx 63, 1, 65535 # encoding: [0x05,0x00,0x00,0x00, +# CHECK-BE-SAME: 0x83,0xe3,0xff,0xff] +# CHECK-LE: xxsplti32dx 63, 1, 65535 # encoding: [0x00,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe3,0x83] + xxsplti32dx 63, 1, 65535 +# CHECK-BE: xxsplti32dx 63, 1, 4294967295 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe3,0xff,0xff] +# CHECK-LE: xxsplti32dx 63, 1, 4294967295 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe3,0x83] + xxsplti32dx 63, 1, 4294967295 +# CHECK-BE: xxsplti32dx 63, 1, -1 # encoding: [0x05,0x00,0xff,0xff, +# CHECK-BE-SAME: 0x83,0xe3,0xff,0xff] +# CHECK-LE: xxsplti32dx 63, 1, -1 # encoding: [0xff,0xff,0x00,0x05, +# CHECK-LE-SAME: 0xff,0xff,0xe3,0x83] + xxsplti32dx 63, 1, -1