diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -329,6 +329,12 @@ BUILTIN(__builtin_altivec_vexpanddm, "V2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vexpandqm, "V1ULLLiV1ULLLi", "") +// P10 Vector Count with Mask built-ins. +BUILTIN(__builtin_altivec_vcntmbb, "ULLiV16UcUi", "") +BUILTIN(__builtin_altivec_vcntmbh, "ULLiV8UsUi", "") +BUILTIN(__builtin_altivec_vcntmbw, "ULLiV4UiUi", "") +BUILTIN(__builtin_altivec_vcntmbd, "ULLiV2ULLiUi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -17080,6 +17080,18 @@ return __builtin_altivec_vexpandqm(__a); } +/* vec_cntm */ + +#define vec_cntm(__a, __mp) \ + _Generic((__a), vector unsigned char \ + : __builtin_altivec_vcntmbb((__a), (unsigned int)(__mp)), \ + vector unsigned short \ + : __builtin_altivec_vcntmbh((__a), (unsigned int)(__mp)), \ + vector unsigned int \ + : __builtin_altivec_vcntmbw((__a), (unsigned int)(__mp)), \ + vector unsigned long long \ + : __builtin_altivec_vcntmbd((__a), (unsigned int)(__mp))) + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -244,6 +244,30 @@ return vec_expandm(vui128a); } +unsigned long long test_vec_cntm_uc(void) { + // CHECK: @llvm.ppc.altivec.vcntmbb(<16 x i8> %{{.+}}, i32 + // CHECK-NEXT: ret i64 + return vec_cntm(vuca, 1); +} + +unsigned long long test_vec_cntm_us(void) { + // CHECK: @llvm.ppc.altivec.vcntmbh(<8 x i16> %{{.+}}, i32 + // CHECK-NEXT: ret i64 + return vec_cntm(vusa, 0); +} + +unsigned long long test_vec_cntm_ui(void) { + // CHECK: @llvm.ppc.altivec.vcntmbw(<4 x i32> %{{.+}}, i32 + // CHECK-NEXT: ret i64 + return vec_cntm(vuia, 1); +} + +unsigned long long test_vec_cntm_ull(void) { + // CHECK: @llvm.ppc.altivec.vcntmbd(<2 x i64> %{{.+}}, i32 + // CHECK-NEXT: ret i64 + return vec_cntm(vulla, 0); +} + unsigned long long test_vgnb_1(void) { // CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 2) // CHECK-NEXT: ret i64 diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -467,6 +467,20 @@ def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">, Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Count with Mask intrinsics. + def int_ppc_altivec_vcntmbb : GCCBuiltin<"__builtin_altivec_vcntmbb">, + Intrinsic<[llvm_i64_ty], [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_ppc_altivec_vcntmbh : GCCBuiltin<"__builtin_altivec_vcntmbh">, + Intrinsic<[llvm_i64_ty], [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_ppc_altivec_vcntmbw : GCCBuiltin<"__builtin_altivec_vcntmbw">, + Intrinsic<[llvm_i64_ty], [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + def int_ppc_altivec_vcntmbd : GCCBuiltin<"__builtin_altivec_vcntmbd">, + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1046,19 +1046,23 @@ def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD), (ins vrrc:$vB, u1imm:$MP), "vcntmbb $rD, $vB, $MP", IIC_VecGeneral, - []>; + [(set i64:$rD, (int_ppc_altivec_vcntmbb + v16i8:$vB, timm:$MP))]>; def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD), (ins vrrc:$vB, u1imm:$MP), "vcntmbh $rD, $vB, $MP", IIC_VecGeneral, - []>; + [(set i64:$rD, (int_ppc_altivec_vcntmbh + v8i16:$vB, timm:$MP))]>; def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD), (ins vrrc:$vB, u1imm:$MP), "vcntmbw $rD, $vB, $MP", IIC_VecGeneral, - []>; + [(set i64:$rD, (int_ppc_altivec_vcntmbw + v4i32:$vB, timm:$MP))]>; def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD), (ins vrrc:$vB, u1imm:$MP), "vcntmbd $rD, $vB, $MP", IIC_VecGeneral, - []>; + [(set i64:$rD, (int_ppc_altivec_vcntmbd + v2i64:$vB, timm:$MP))]>; def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, gprc:$rC), "vextdubvlx $vD, $vA, $vB, $rC", diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll --- a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -120,3 +120,48 @@ %exp = tail call <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128> %a) ret <1 x i128> %exp } + +declare i64 @llvm.ppc.altivec.vcntmbb(<16 x i8>, i32) +declare i64 @llvm.ppc.altivec.vcntmbh(<8 x i16>, i32) +declare i64 @llvm.ppc.altivec.vcntmbw(<4 x i32>, i32) +declare i64 @llvm.ppc.altivec.vcntmbd(<2 x i64>, i32) + +define i64 @test_vcntmbb(<16 x i8> %a) { +; CHECK-LABEL: test_vcntmbb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcntmbb r3, v2, 1 +; CHECK-NEXT: blr +entry: + %cnt = tail call i64 @llvm.ppc.altivec.vcntmbb(<16 x i8> %a, i32 1) + ret i64 %cnt +} + +define i64 @test_vcntmbh(<8 x i16> %a) { +; CHECK-LABEL: test_vcntmbh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcntmbh r3, v2, 0 +; CHECK-NEXT: blr +entry: + %cnt = tail call i64 @llvm.ppc.altivec.vcntmbh(<8 x i16> %a, i32 0) + ret i64 %cnt +} + +define i64 @test_vcntmbw(<4 x i32> %a) { +; CHECK-LABEL: test_vcntmbw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcntmbw r3, v2, 1 +; CHECK-NEXT: blr +entry: + %cnt = tail call i64 @llvm.ppc.altivec.vcntmbw(<4 x i32> %a, i32 1) + ret i64 %cnt +} + +define i64 @test_vcntmbd(<2 x i64> %a) { +; CHECK-LABEL: test_vcntmbd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vcntmbd r3, v2, 0 +; CHECK-NEXT: blr +entry: + %cnt = tail call i64 @llvm.ppc.altivec.vcntmbd(<2 x i64> %a, i32 0) + ret i64 %cnt +}