Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -401,11 +401,15 @@ setOperationAction(ISD::ADD , VT, Legal); setOperationAction(ISD::SUB , VT, Legal); - // Vector popcnt instructions introduced in P8 - if (Subtarget.hasP8Altivec()) + // Vector instructions introduced in P8 + if (Subtarget.hasP8Altivec()) { setOperationAction(ISD::CTPOP, VT, Legal); - else + setOperationAction(ISD::CTLZ, VT, Legal); + } + else { setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + } // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); @@ -461,7 +465,6 @@ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); - setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -940,6 +940,21 @@ def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; let Predicates = [HasP8Altivec] in { + +// Count Leading Zeros +def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzb $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (ctlz v16i8:$vB))]>; +def VCLZH : VXForm_2<1858, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzh $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (ctlz v8i16:$vB))]>; +def VCLZW : VXForm_2<1922, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzw $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (ctlz v4i32:$vB))]>; +def VCLZD : VXForm_2<1986, (outs vrrc:$vD), (ins vrrc:$vB), + "vclzd $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (ctlz v2i64:$vB))]>; + // Population Count def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB), "vpopcntb $vD, $vB", IIC_VecGeneral, Index: test/CodeGen/PowerPC/vec_clz.ll =================================================================== --- test/CodeGen/PowerPC/vec_clz.ll +++ test/CodeGen/PowerPC/vec_clz.ll @@ -0,0 +1,40 @@ +; Check the vctlz* instructions that were added in P8 +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>) nounwind readnone +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>) nounwind readnone +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>) nounwind readnone + +define <16 x i8> @test_v16i8(<16 x i8> %x) nounwind readnone { + %vcnt = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x) + ret <16 x i8> %vcnt +; CHECK: @test_v16i8 +; CHECK: vclzb 2, 2 +; CHECK: blr +} + +define <8 x i16> @test_v8i16(<8 x i16> %x) nounwind readnone { + %vcnt = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %x) + ret <8 x i16> %vcnt +; CHECK: @test_v8i16 +; CHECK: vclzh 2, 2 +; CHECK: blr +} + +define <4 x i32> @test_v4i32(<4 x i32> %x) nounwind readnone { + %vcnt = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x) + ret <4 x i32> %vcnt +; CHECK: @test_v4i32 +; CHECK: vclzw 2, 2 +; CHECK: blr +} + +define <2 x i64> @test_v2i64(<2 x i64> %x) nounwind readnone { + %vcnt = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64 +; CHECK: vclzd 2, 2 +; CHECK: blr +} Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -501,6 +501,18 @@ # CHECK: vrsqrtefp 2, 3 0x10 0x40 0x19 0x4a +# CHECK: vclzb 2, 3 +0x10 0x40 0x1f 0x02 + +# CHECK: vclzh 2, 3 +0x10 0x40 0x1f 0x42 + +# CHECK: vclzw 2, 3 +0x10 0x40 0x1f 0x82 + +# CHECK: vclzd 2, 3 +0x10 0x40 0x1f 0xc2 + # CHECK: vpopcntb 2, 3 0x10 0x40 0x1f 0x03 Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -543,6 +543,23 @@ # CHECK-LE: vrsqrtefp 2, 3 # encoding: [0x4a,0x19,0x40,0x10] vrsqrtefp 2, 3 +# Vector count leading zero instructions +# CHECK-BE: vclzb 2, 3 # encoding: [0x10,0x40,0x1f,0x02] +# CHECK-LE: vclzb 2, 3 # encoding: [0x02,0x1f,0x40,0x10] + vclzb 2, 3 + +# CHECK-BE: vclzh 2, 3 # encoding: [0x10,0x40,0x1f,0x42] +# CHECK-LE: vclzh 2, 3 # encoding: [0x42,0x1f,0x40,0x10] + vclzh 2, 3 + +# CHECK-BE: vclzw 2, 3 # encoding: [0x10,0x40,0x1f,0x82] +# CHECK-LE: vclzw 2, 3 # encoding: [0x82,0x1f,0x40,0x10] + vclzw 2, 3 + +# CHECK-BE: vclzd 2, 3 # encoding: [0x10,0x40,0x1f,0xc2] +# CHECK-LE: vclzd 2, 3 # encoding: [0xc2,0x1f,0x40,0x10] + vclzd 2, 3 + # Vector population count instructions # CHECK-BE: vpopcntb 2, 3 # encoding: [0x10,0x40,0x1f,0x03] # CHECK-LE: vpopcntb 2, 3 # encoding: [0x03,0x1f,0x40,0x10]