diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -31,6 +31,9 @@ def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; + // Population Count in each Byte. + def int_ppc_popcntb : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>; + // sync instruction (i.e. sync 0, a.k.a hwsync) def int_ppc_sync : Intrinsic<[], [], []>; // lwsync is sync 1 diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -786,8 +786,9 @@ "popcntw $rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctpop i32:$rS))]>; -def POPCNTB : XForm_11<31, 122, (outs gprc:$rA), (ins gprc:$rS), - "popcntb $rA, $rS", IIC_IntGeneral, []>; +def POPCNTB : XForm_11<31, 122, (outs g8rc:$rA), (ins g8rc:$rS), + "popcntb $rA, $rS", IIC_IntGeneral, + [(set i64:$rA, (int_ppc_popcntb i64:$rS))]>; defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divd", "$rT, $rA, $rB", IIC_IntDivD, diff --git a/llvm/test/CodeGen/PowerPC/popcnt.ll b/llvm/test/CodeGen/PowerPC/popcnt.ll --- a/llvm/test/CodeGen/PowerPC/popcnt.ll +++ b/llvm/test/CodeGen/PowerPC/popcnt.ll @@ -4,6 +4,24 @@ ; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q < %s | FileCheck %s --check-prefix=SLOWPC ; RUN: llc -verify-machineinstrs -mtriple=ppc64-- -mcpu=a2q -mattr=+popcntd < %s | FileCheck %s +define i64 @_cntb64(i64 %x) nounwind readnone { + %cnt = tail call i64 @llvm.ppc.popcntb(i64 %x) + ret i64 %cnt +; CHECK-LABEL: @_cntb64 +; CHECK: popcntb +; CHECK: blr +} + +define i32 @_cntb32(i32 %x) nounwind readnone { + %y = zext i32 %x to i64 + %cnt = tail call i64 @llvm.ppc.popcntb(i64 %y) + %res = trunc i64 %cnt to i32 + ret i32 %res +; CHECK-LABEL: @_cntb32 +; CHECK: popcntb +; CHECK: blr +} + define i8 @cnt8(i8 %x) nounwind readnone { %cnt = tail call i8 @llvm.ctpop.i8(i8 %x) ret i8 %cnt @@ -54,3 +72,4 @@ declare i16 @llvm.ctpop.i16(i16) nounwind readnone declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare i64 @llvm.ctpop.i64(i64) nounwind readnone +declare i64 @llvm.ppc.popcntb(i64) nounwind readnone