diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -302,6 +302,12 @@ BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Centrifuge +BUILTIN(__builtin_altivec_vcfuged, "V2ULLiV2ULLiV2ULLi", "") + +// P10 Vector Gather Every N-th Bit +BUILTIN(__builtin_altivec_vgnb, "ULLiV1ULLLiIi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") @@ -426,6 +432,8 @@ BUILTIN(__builtin_vsx_xxpermdi, "v.", "t") BUILTIN(__builtin_vsx_xxsldwi, "v.", "t") +BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "") + // Float 128 built-ins BUILTIN(__builtin_sqrtf128_round_to_odd, "LLdLLd", "") BUILTIN(__builtin_addf128_round_to_odd, "LLdLLdLLd", "") @@ -476,6 +484,7 @@ BUILTIN(__builtin_bpermd, "SLLiSLLiSLLi", "") BUILTIN(__builtin_pdepd, "ULLiULLiULLi", "") BUILTIN(__builtin_pextd, "ULLiULLiULLi", "") +BUILTIN(__builtin_cfuged, "ULLiULLiULLi", "") // Vector int128 (un)pack BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16776,6 +16776,42 @@ vec_pext(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vpextd(__a, __b); } + +/* vec_cfuge */ + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_cfuge(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vcfuged(__a, __b); +} + +/* vec_gnb */ + +#define vec_gnb(__a, __b) __builtin_altivec_vgnb(__a, __b) + +/* vec_ternarylogic */ +#ifdef __VSX__ +#define vec_ternarylogic(__a, __b, __c, __imm) \ + _Generic((__a), vector unsigned char \ + : __builtin_vsx_xxeval((vector unsigned long long)(__a), \ + (vector unsigned long long)(__b), \ + (vector unsigned long long)(__c), (__imm)), \ + vector unsigned short \ + : __builtin_vsx_xxeval((vector unsigned long long)(__a), \ + (vector unsigned long long)(__b), \ + (vector unsigned long long)(__c), (__imm)), \ + vector unsigned int \ + : __builtin_vsx_xxeval((vector unsigned long long)(__a), \ + (vector unsigned long long)(__b), \ + (vector unsigned long long)(__c), (__imm)), \ + vector unsigned long long \ + : __builtin_vsx_xxeval((vector unsigned long long)(__a), \ + (vector unsigned long long)(__b), \ + (vector unsigned long long)(__c), (__imm)), \ + vector unsigned __int128 \ + : __builtin_vsx_xxeval((vector unsigned long long)(__a), \ + (vector unsigned long long)(__b), \ + (vector unsigned long long)(__c), (__imm))) +#endif /* __VSX__ */ #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3055,6 +3055,10 @@ SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); case PPC::BI__builtin_pack_vector_int128: return SemaVSXCheck(TheCall); + case PPC::BI__builtin_altivec_vgnb: + return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7); + case PPC::BI__builtin_vsx_xxeval: + return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255); } return SemaBuiltinConstantArgRange(TheCall, i, l, u); } diff --git a/clang/test/CodeGen/builtins-ppc-p10.c b/clang/test/CodeGen/builtins-ppc-p10.c --- a/clang/test/CodeGen/builtins-ppc-p10.c +++ b/clang/test/CodeGen/builtins-ppc-p10.c @@ -13,3 +13,8 @@ // CHECK: @llvm.ppc.pextd return __builtin_pextd(ulla, ullb); } + +unsigned long long test_cfuged(void) { + // CHECK: @llvm.ppc.cfuged + return __builtin_cfuged(ulla, ullb); +} diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -5,7 +5,13 @@ #include -vector unsigned long long vulla, vullb; +vector unsigned int vuia, vuib, vuic; +vector unsigned long long vulla, vullb, vullc; + +vector unsigned char vuca, vucb, vucc; +vector unsigned short vusa, vusb, vusc; + +vector unsigned __int128 vui128a, vui128b, vui128c; vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> @@ -18,3 +24,57 @@ // CHECK-NEXT: ret <2 x i64> return vec_pext(vulla, vullb); } + +vector unsigned long long test_vcfuged(void) { + // CHECK: @llvm.ppc.altivec.vcfuged(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_cfuge(vulla, vullb); +} + +unsigned long long test_vgnb_1(void) { + // CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 2) + // CHECK-NEXT: ret i64 + return vec_gnb(vui128a, 2); +} + +unsigned long long test_vgnb_2(void) { + // CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 7) + // CHECK-NEXT: ret i64 + return vec_gnb(vui128a, 7); +} + +unsigned long long test_vgnb_3(void) { + // CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 5) + // CHECK-NEXT: ret i64 + return vec_gnb(vui128a, 5); +} + +vector unsigned char test_xxeval_uc(void) { + // CHECK: @llvm.ppc.vsx.xxeval(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, i32 0) + // CHECK: ret <16 x i8> + return vec_ternarylogic(vuca, vucb, vucc, 0); +} + +vector unsigned short test_xxeval_us(void) { + // CHECK: @llvm.ppc.vsx.xxeval(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, i32 255) + // CHECK: ret <8 x i16> + return vec_ternarylogic(vusa, vusb, vusc, 255); +} + +vector unsigned int test_xxeval_ui(void) { + // CHECK: @llvm.ppc.vsx.xxeval(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, i32 150) + // CHECK: ret <4 x i32> + return vec_ternarylogic(vuia, vuib, vuic, 150); +} + +vector unsigned long long test_xxeval_ull(void) { + // CHECK: @llvm.ppc.vsx.xxeval(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, i32 1) + // CHECK: ret <2 x i64> + return vec_ternarylogic(vulla, vullb, vullc, 1); +} + +vector unsigned __int128 test_xxeval_ui128(void) { + // CHECK: @llvm.ppc.vsx.xxeval(<2 x i64> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}, i32 246) + // CHECK: ret <1 x i128> + return vec_ternarylogic(vui128a, vui128b, vui128c, 246); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -77,6 +77,11 @@ : GCCBuiltin<"__builtin_pextd">, Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + // Centrifuge Doubleword Builtin. + def int_ppc_cfuged + : GCCBuiltin<"__builtin_cfuged">, + Intrinsic <[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_ppc_truncf128_round_to_odd : GCCBuiltin<"__builtin_truncf128_round_to_odd">, Intrinsic <[llvm_double_ty], [llvm_f128_ty], [IntrNoMem]>; @@ -426,6 +431,16 @@ def int_ppc_altivec_vpextd : GCCBuiltin<"__builtin_altivec_vpextd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + + // P10 Vector Centrifuge Builtin. + def int_ppc_altivec_vcfuged : GCCBuiltin<"__builtin_altivec_vcfuged">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + + // P10 Vector Gather Every Nth Bit Builtin. + def int_ppc_altivec_vgnb : GCCBuiltin<"__builtin_altivec_vgnb">, + Intrinsic<[llvm_i64_ty], [llvm_v1i128_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } // Vector average. @@ -954,6 +969,11 @@ PowerPC_VSX_Intrinsic<"xxinsertw",[llvm_v4i32_ty], [llvm_v4i32_ty,llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>; +def int_ppc_vsx_xxeval : + PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -161,6 +161,54 @@ let Inst{48-63} = D_RA{15-0}; // d1 } +class 8RR_XX4Form_IMM8_XTAB6 opcode, bits<2> xo, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XA; + bits<6> XB; + bits<6> XC; + bits<8> IMM; + + let Pattern = pattern; + + // The prefix. + let Inst{6-7} = 1; + let Inst{8} = 0; + let Inst{9-11} = 0; + let Inst{12-13} = 0; + let Inst{14-23} = 0; + let Inst{24-31} = IMM; + + // The instruction. + let Inst{38-42} = XT{4-0}; + let Inst{43-47} = XA{4-0}; + let Inst{48-52} = XB{4-0}; + let Inst{53-57} = XC{4-0}; + let Inst{58-59} = xo; + let Inst{60} = XC{5}; + let Inst{61} = XA{5}; + let Inst{62} = XB{5}; + let Inst{63} = XT{5}; +} + +class VXForm_RD5_N3_VB5 xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + bits<3> N; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-12} = 0; + let Inst{13-15} = N; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + multiclass MLS_DForm_R_SI34_RTA5_MEM_p opcode, dag OOL, dag IOL, dag PCRel_IOL, string asmstr, InstrItinClass itin> { @@ -516,4 +564,21 @@ def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), "pextd $rA, $rS, $rB", IIC_IntGeneral, [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>; + def VCFUGED : VXForm_1<1357, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vcfuged $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vcfuged v2i64:$vA, v2i64:$vB))]>; + def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$rD), (ins vrrc:$vB, u3imm:$N), + "vgnb $rD, $vB, $N", IIC_VecGeneral, + [(set i64:$rD, + (int_ppc_altivec_vgnb v1i128:$vB, timm:$N))]>; + def CFUGED : XForm_6<31, 220, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cfuged $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_cfuged i64:$rS, i64:$rB))]>; + def XXEVAL : + 8RR_XX4Form_IMM8_XTAB6<34, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, + vsrc:$XC, u8imm:$IMM), "xxeval $XT, $XA, $XB, $XC, $IMM", + IIC_VecGeneral, + [(set v2i64:$XT, + (int_ppc_vsx_xxeval v2i64:$XA, v2i64:$XB, v2i64:$XC, timm:$IMM))]>; } diff --git a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll --- a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll +++ b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll @@ -9,6 +9,11 @@ declare <2 x i64> @llvm.ppc.altivec.vpextd(<2 x i64>, <2 x i64>) declare i64 @llvm.ppc.pdepd(i64, i64) declare i64 @llvm.ppc.pextd(i64, i64) +declare <2 x i64> @llvm.ppc.altivec.vcfuged(<2 x i64>, <2 x i64>) +declare i64 @llvm.ppc.cfuged(i64, i64) +declare i64 @llvm.ppc.altivec.vgnb(<1 x i128>, i32) +declare <2 x i64> @llvm.ppc.vsx.xxeval(<2 x i64>, <2 x i64>, <2 x i64>, i32) + define <2 x i64> @test_vpdepd(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vpdepd: @@ -49,3 +54,63 @@ %tmp = tail call i64 @llvm.ppc.pextd(i64 %a, i64 %b) ret i64 %tmp } + +define <2 x i64> @test_vcfuged(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vcfuged: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcfuged v2, v2, v3 +; CHECK-NEXT: blr +entry: + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcfuged(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %tmp +} + +define i64 @test_cfuged(i64 %a, i64 %b) { +; CHECK-LABEL: test_cfuged: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cfuged r3, r3, r4 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.cfuged(i64 %a, i64 %b) + ret i64 %tmp +} + +define i64 @test_vgnb_1(<1 x i128> %a) { +; CHECK-LABEL: test_vgnb_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vgnb r3, v2, 2 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.altivec.vgnb(<1 x i128> %a, i32 2) + ret i64 %tmp +} + +define i64 @test_vgnb_2(<1 x i128> %a) { +; CHECK-LABEL: test_vgnb_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vgnb r3, v2, 7 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.altivec.vgnb(<1 x i128> %a, i32 7) + ret i64 %tmp +} + +define i64 @test_vgnb_3(<1 x i128> %a) { +; CHECK-LABEL: test_vgnb_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vgnb r3, v2, 5 +; CHECK-NEXT: blr +entry: + %tmp = tail call i64 @llvm.ppc.altivec.vgnb(<1 x i128> %a, i32 5) + ret i64 %tmp +} + +define <2 x i64> @test_xxeval(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_xxeval: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v3, v4, 255 +; CHECK-NEXT: blr +entry: + %tmp = tail call <2 x i64> @llvm.ppc.vsx.xxeval(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 255) + ret <2 x i64> %tmp +} diff --git a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt --- a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -12,3 +12,15 @@ # CHECK: pextd 1, 2, 4 0x7c 0x41 0x21 0x78 + +# CHECK: vcfuged 1, 2, 4 +0x10 0x22 0x25 0x4d + +# CHECK: cfuged 1, 2, 4 +0x7c 0x41 0x21 0xb8 + +# CHECK: vgnb 1, 2, 2 +0x10 0x22 0x14 0xcc + +# CHECK: xxeval 32, 1, 2, 3, 2 +0x05 0x00 0x00 0x02 0x88 0x01 0x10 0xd1 diff --git a/llvm/test/MC/PowerPC/p10.s b/llvm/test/MC/PowerPC/p10.s --- a/llvm/test/MC/PowerPC/p10.s +++ b/llvm/test/MC/PowerPC/p10.s @@ -15,3 +15,18 @@ # CHECK-BE: pextd 1, 2, 4 # encoding: [0x7c,0x41,0x21,0x78] # CHECK-LE: pextd 1, 2, 4 # encoding: [0x78,0x21,0x41,0x7c] pextd 1, 2, 4 +# CHECK-BE: vcfuged 1, 2, 4 # encoding: [0x10,0x22,0x25,0x4d] +# CHECK-LE: vcfuged 1, 2, 4 # encoding: [0x4d,0x25,0x22,0x10] + vcfuged 1, 2, 4 +# CHECK-BE: cfuged 1, 2, 4 # encoding: [0x7c,0x41,0x21,0xb8] +# CHECK-LE: cfuged 1, 2, 4 # encoding: [0xb8,0x21,0x41,0x7c] + cfuged 1, 2, 4 +# CHECK-BE: vgnb 1, 2, 2 # encoding: [0x10,0x22,0x14,0xcc] +# CHECK-LE: vgnb 1, 2, 2 # encoding: [0xcc,0x14,0x22,0x10] + vgnb 1, 2, 2 +# CHECK-BE: xxeval 32, 1, 2, 3, 2 # encoding: [0x05,0x00,0x00,0x02, +# CHECK-BE-SAME: 0x88,0x01,0x10,0xd1] +# CHECK-LE: xxeval 32, 1, 2, 3, 2 # encoding: [0x02,0x00,0x00,0x05, +# CHECK-LE-SAME: 0xd1,0x10,0x01,0x88] + xxeval 32, 1, 2, 3, 2 +