diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -302,6 +302,10 @@ BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Count Leading / Trailing Zeroes under bit Mask built-ins. +BUILTIN(__builtin_altivec_vclzdm, "V2ULLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vctzdm, "V2ULLiV2ULLiV2ULLi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") @@ -476,6 +480,8 @@ BUILTIN(__builtin_bpermd, "SLLiSLLiSLLi", "") BUILTIN(__builtin_pdepd, "ULLiULLiULLi", "") BUILTIN(__builtin_pextd, "ULLiULLiULLi", "") +BUILTIN(__builtin_cntlzdm, "ULLiULLiULLi", "") +BUILTIN(__builtin_cnttzdm, "ULLiULLiULLi", "") // Vector int128 (un)pack BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13962,6 +13962,24 @@ Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); return Builder.CreateCall(F, {X, Undef}); } + case PPC::BI__builtin_altivec_vclzdm: + case PPC::BI__builtin_altivec_vctzdm: + case PPC::BI__builtin_cntlzdm: + case PPC::BI__builtin_cnttzdm: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *AndAB = Builder.CreateAnd(A, B); + Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); + if (BuiltinID == PPC::BI__builtin_altivec_vclzdm || + BuiltinID == PPC::BI__builtin_cntlzdm) + ID = Intrinsic::ctlz; + else if (BuiltinID == PPC::BI__builtin_altivec_vctzdm || + BuiltinID == PPC::BI__builtin_cnttzdm) + ID = Intrinsic::cttz; + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {AndAB, Undef}); + } case PPC::BI__builtin_altivec_vpopcntb: case PPC::BI__builtin_altivec_vpopcnth: case PPC::BI__builtin_altivec_vpopcntw: diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16776,6 +16776,21 @@ vec_pext(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vpextd(__a, __b); } + +/* vec_cntlzm */ + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_cntlzm(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vclzdm(__a, __b); +} + +/* vec_cnttzm */ + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_cnttzm(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vctzdm(__a, __b); +} + #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai diff --git a/clang/test/CodeGen/builtins-ppc-p10.c b/clang/test/CodeGen/builtins-ppc-p10.c --- a/clang/test/CodeGen/builtins-ppc-p10.c +++ b/clang/test/CodeGen/builtins-ppc-p10.c @@ -13,3 +13,17 @@ // CHECK: @llvm.ppc.pextd return __builtin_pextd(ulla, ullb); } + +unsigned long long test_cntlzdm(void) { + // CHECK: and i64 %{{.+}}, %{{.+}} + // CHECK-NEXT: @llvm.ctlz.i64(i64 + // CHECK-NEXT: ret i64 + return __builtin_cntlzdm(ulla, ullb); +} + +unsigned long long test_cnttzdm(void) { + // CHECK: and i64 %{{.+}}, %{{.+}} + // CHECK-NEXT: @llvm.cttz.i64(i64 + // CHECK-NEXT: ret i64 + return __builtin_cnttzdm(ulla, ullb); +} diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -18,3 +18,17 @@ // CHECK-NEXT: ret <2 x i64> return vec_pext(vulla, vullb); } + +vector unsigned long long test_vclzdm(void) { + // CHECK: and <2 x i64> %{{.+}}, %{{.+}} + // CHECK-NEXT: @llvm.ctlz.v2i64(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_cntlzm(vulla, vullb); +} + +vector unsigned long long test_vctzdm(void) { + // CHECK: and <2 x i64> %{{.+}}, %{{.+}} + // CHECK-NEXT: @llvm.cttz.v2i64(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_cnttzm(vulla, vullb); +} diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -516,4 +516,18 @@ def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), "pextd $rA, $rS, $rB", IIC_IntGeneral, [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>; + def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vclzdm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (ctlz (v2i64 (bitconvert + (and v4i32:$vA, v4i32:$vB)))))]>; + def VCTZDM : VXForm_1<1988, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vctzdm $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (cttz (v2i64 (bitconvert + (and v4i32:$vA, v4i32:$vB)))))]>; + def CNTLZDM : XForm_6<31, 59, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cntlzdm $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (ctlz (and i64:$rS, i64:$rB)))]>; + def CNTTZDM : XForm_6<31, 571, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "cnttzdm $rA, $rS, $rB", IIC_IntGeneral, + [(set i64:$rA, (cttz (and i64:$rS, i64:$rB)))]>; } diff --git a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll --- a/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll +++ b/llvm/test/CodeGen/PowerPC/p10-bit-manip-ops.ll @@ -10,6 +10,12 @@ declare i64 @llvm.ppc.pdepd(i64, i64) declare i64 @llvm.ppc.pextd(i64, i64) +; Count leading/trailing zero with mask builtins lowered to use the following: +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1 immarg) +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1 immarg) +declare i64 @llvm.ctlz.i64(i64, i1 immarg) +declare i64 @llvm.cttz.i64(i64, i1 immarg) + define <2 x i64> @test_vpdepd(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vpdepd: ; CHECK: # %bb.0: # %entry @@ -49,3 +55,47 @@ %tmp = tail call i64 @llvm.ppc.pextd(i64 %a, i64 %b) ret i64 %tmp } + +define <2 x i64> @test_vclzdm(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vclzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclzdm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %0 = and <2 x i64> %a, %b + %1 = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %0, i1 false) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vctzdm(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vctzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vctzdm v2, v2, v3 +; CHECK-NEXT: blr +entry: + %0 = and <2 x i64> %a, %b + %1 = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %0, i1 false) + ret <2 x i64> %1 +} + +define i64 @test_cntlzdm(i64 %a, i64 %b) { +; CHECK-LABEL: test_cntlzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cntlzdm r3, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = and i64 %a, %b + %1 = tail call i64 @llvm.ctlz.i64(i64 %0, i1 false) + ret i64 %1 +} + +define i64 @test_cnttzdm(i64 %a, i64 %b) { +; CHECK-LABEL: test_cnttzdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cnttzdm r3, r3, r4 +; CHECK-NEXT: blr +entry: + %0 = and i64 %a, %b + %1 = tail call i64 @llvm.cttz.i64(i64 %0, i1 false) + ret i64 %1 +} diff --git a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt --- a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -12,3 +12,15 @@ # CHECK: pextd 1, 2, 4 0x7c 0x41 0x21 0x78 + +# CHECK: vclzdm 1, 2, 3 +0x10 0x22 0x1f 0x84 + +# CHECK: vctzdm 1, 2, 3 +0x10 0x22 0x1f 0xc4 + +# CHECK: cntlzdm 1, 3, 2 +0x7c 0x61 0x10 0x76 + +# CHECK: cnttzdm 1, 3, 2 +0x7c 0x61 0x14 0x76 diff --git a/llvm/test/MC/PowerPC/p10.s b/llvm/test/MC/PowerPC/p10.s --- a/llvm/test/MC/PowerPC/p10.s +++ b/llvm/test/MC/PowerPC/p10.s @@ -15,3 +15,15 @@ # CHECK-BE: pextd 1, 2, 4 # encoding: [0x7c,0x41,0x21,0x78] # CHECK-LE: pextd 1, 2, 4 # encoding: [0x78,0x21,0x41,0x7c] pextd 1, 2, 4 +# CHECK-BE: vclzdm 1, 2, 3 # encoding: [0x10,0x22,0x1f,0x84] +# CHECK-LE: vclzdm 1, 2, 3 # encoding: [0x84,0x1f,0x22,0x10] + vclzdm 1, 2, 3 +# CHECK-BE: vctzdm 1, 2, 3 # encoding: [0x10,0x22,0x1f,0xc4] +# CHECK-LE: vctzdm 1, 2, 3 # encoding: [0xc4,0x1f,0x22,0x10] + vctzdm 1, 2, 3 +# CHECK-BE: cntlzdm 1, 3, 2 # encoding: [0x7c,0x61,0x10,0x76] +# CHECK-LE: cntlzdm 1, 3, 2 # encoding: [0x76,0x10,0x61,0x7c] + cntlzdm 1, 3, 2 +# CHECK-BE: cnttzdm 1, 3, 2 # encoding: [0x7c,0x61,0x14,0x76] +# CHECK-LE: cnttzdm 1, 3, 2 # encoding: [0x76,0x14,0x61,0x7c] + cnttzdm 1, 3, 2