Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -109,9 +109,12 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", "Enable VSX instructions", [FeatureAltivec]>; +def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", + "Enable POWER8 Altivec instructions", + [FeatureAltivec]>; def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", - [FeatureVSX, FeatureAltivec]>; + [FeatureVSX, FeatureP8Altivec]>; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -401,6 +401,14 @@ setOperationAction(ISD::ADD , VT, Legal); setOperationAction(ISD::SUB , VT, Legal); + // Vector popcnt instructions introduced in P8 + if (Subtarget.hasP8Altivec()) { + setOperationAction(ISD::CTPOP, VT, Legal); + } + else { + setOperationAction(ISD::CTPOP, VT, Expand); + } + // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); @@ -455,7 +463,6 @@ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); @@ -593,6 +600,10 @@ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } + + if (Subtarget.hasP8Altivec()) { + addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); + } } if (Subtarget.has64BitSupport()) Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -774,6 +774,21 @@ [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } } + +// Population Count +def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntb $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (ctpop v16i8:$vB))]>; +def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcnth $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (ctpop v8i16:$vB))]>; +def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntw $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (ctpop v4i32:$vB))]>; +def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntd $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (ctpop v2i64:$vB))]>; + } // VALU Operations. //===----------------------------------------------------------------------===// @@ -791,18 +806,27 @@ def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>; // Shuffles. Index: lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.td +++ lib/Target/PowerPC/PPCRegisterInfo.td @@ -242,7 +242,7 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; -def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, +def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v4f32], 128, (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -92,6 +92,7 @@ bool HasQPX; bool HasVSX; bool HasP8Vector; + bool HasP8Altivec; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -219,6 +220,7 @@ bool hasQPX() const { return HasQPX; } bool hasVSX() const { return HasVSX; } bool hasP8Vector() const { return HasP8Vector; } + bool hasP8Altivec() const { return HasP8Altivec; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } Index: lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- lib/Target/PowerPC/PPCSubtarget.cpp +++ lib/Target/PowerPC/PPCSubtarget.cpp @@ -99,6 +99,7 @@ HasQPX = false; HasVSX = false; HasP8Vector = false; + HasP8Altivec = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; Index: test/CodeGen/PowerPC/vec_popcnt.ll =================================================================== --- test/CodeGen/PowerPC/vec_popcnt.ll +++ test/CodeGen/PowerPC/vec_popcnt.ll @@ -0,0 +1,72 @@ +; Check the vecpopcnt* instructions that were added in P8 +; In addition, check the conversions to/from the v2i64 VMX register that was also added in P8. +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone +declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone + +define <16 x i8> @test_v16i8_v2i64(<2 x i64> %x) nounwind readnone { + %tmp = bitcast <2 x i64> %x to <16 x i8>; + %vcnt = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp) + ret <16 x i8> %vcnt +; CHECK: @test_v16i8_v2i64 +; CHECK: vpopcntb +; CHECK: blr +} + +define <8 x i16> @test_v8i16_v2i64(<2 x i64> %x) nounwind readnone { + %tmp = bitcast <2 x i64> %x to <8 x i16> + %vcnt = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp) + ret <8 x i16> %vcnt +; CHECK: @test_v8i16_v2i64 +; CHECK: vpopcnth +; CHECK: blr +} + +define <4 x i32> @test_v4i32_v2i64(<2 x i64> %x) nounwind readnone { + %tmp = bitcast <2 x i64> %x to <4 x i32> + %vcnt = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp) + ret <4 x i32> %vcnt +; CHECK: @test_v4i32_v2i64 +; CHECK: vpopcntw +; CHECK: blr +} + +define <2 x i64> @test_v2i64_v2i64(<2 x i64> %x) nounwind readnone { + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v2i64 +; CHECK: vpopcntd +; CHECK: blr +} + +define <2 x i64> @test_v2i64_v4i32(<4 x i32> %x) nounwind readnone { + %tmp = bitcast <4 x i32> %x to <2 x i64> + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v4i32 +; CHECK: vpopcntd +; CHECK: blr +} + + +define <2 x i64> @test_v2i64_v8i16(<8 x i16> %x) nounwind readnone { + %tmp = bitcast <8 x i16> %x to <2 x i64> + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v8i16 +; CHECK: vpopcntd +; CHECK: blr +} + +define <2 x i64> @test_v2i64_v16i8(<16 x i8> %x) nounwind readnone { + %tmp = bitcast <16 x i8> %x to <2 x i64> + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v16i8 +; CHECK: vpopcntd +; CHECK: blr +} Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -543,6 +543,24 @@ # CHECK-LE: vrsqrtefp 2, 3 # encoding: [0x4a,0x19,0x40,0x10] vrsqrtefp 2, 3 +# Vector population count instructions +# CHECK-BE: vpopcntb 2, 3 # encoding: [0x10,0x40,0x1f,0x03] +# CHECK-LE: vpopcntb 2, 3 # encoding: [0x03,0x1f,0x40,0x10] + vpopcntb 2, 3 + +# CHECK-BE: vpopcnth 2, 3 # encoding: [0x10,0x40,0x1f,0x43] +# CHECK-LE: vpopcnth 2, 3 # encoding: [0x43,0x1f,0x40,0x10] + vpopcnth 2, 3 + +# KB: This is intentionally incorrect to make sure we catch it +# CHECK-BE: vpopcntw 2, 3 # encoding: [0x10,0x40,0x1f,0x83] +# CHECK-LE: vpopcntw 2, 3 # encoding: [0x83,0x1f,0x40,0x10] + vpopcntw 2, 3 + +# BCHECK-BE: vpopcntd 2, 3 # encoding: [0x10,0x40,0x1f,0xC3] +# BCHECK-LE: vpopcntd 2, 3 # encoding: [0xC3,0x1f,0x40,0x10] +# vpopcntd 2, 3 + # Vector status and control register instructions # CHECK-BE: mtvscr 2 # encoding: [0x10,0x00,0x16,0x44]