Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -109,9 +109,12 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", "Enable VSX instructions", [FeatureAltivec]>; +def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", + "Enable POWER8 Altivec instructions", + [FeatureAltivec]>; def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", - [FeatureVSX, FeatureAltivec]>; + [FeatureVSX, FeatureP8Altivec]>; def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true", "Treat mftb as deprecated">; @@ -316,9 +319,9 @@ Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr8", P8Model, - [DirectivePwr8, FeatureAltivec, FeatureVSX, FeatureP8Vector, - FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, - FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, + FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, + FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -401,6 +401,12 @@ setOperationAction(ISD::ADD , VT, Legal); setOperationAction(ISD::SUB , VT, Legal); + // Vector popcnt instructions introduced in P8 + if (Subtarget.hasP8Altivec()) + setOperationAction(ISD::CTPOP, VT, Legal); + else + setOperationAction(ISD::CTPOP, VT, Expand); + // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); @@ -455,7 +461,6 @@ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); @@ -593,6 +598,9 @@ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } + + if (Subtarget.hasP8Altivec()) + addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); } if (Subtarget.has64BitSupport()) Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -791,18 +791,27 @@ def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>; def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>; // Shuffles. @@ -929,3 +938,20 @@ } // end HasAltivec +def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">; +let Predicates = [HasP8Altivec] in { +// Population Count +def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntb $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, (ctpop v16i8:$vB))]>; +def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcnth $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, (ctpop v8i16:$vB))]>; +def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntw $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (ctpop v4i32:$vB))]>; +def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB), + "vpopcntd $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (ctpop v2i64:$vB))]>; + +} // end HasP8Altivec Index: lib/Target/PowerPC/PPCRegisterInfo.td =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.td +++ lib/Target/PowerPC/PPCRegisterInfo.td @@ -242,7 +242,7 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; -def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, +def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v4f32], 128, (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -92,6 +92,7 @@ bool HasQPX; bool HasVSX; bool HasP8Vector; + bool HasP8Altivec; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -219,6 +220,7 @@ bool hasQPX() const { return HasQPX; } bool hasVSX() const { return HasVSX; } bool hasP8Vector() const { return HasP8Vector; } + bool hasP8Altivec() const { return HasP8Altivec; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } Index: lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- lib/Target/PowerPC/PPCSubtarget.cpp +++ lib/Target/PowerPC/PPCSubtarget.cpp @@ -99,6 +99,7 @@ HasQPX = false; HasVSX = false; HasP8Vector = false; + HasP8Altivec = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; Index: test/CodeGen/PowerPC/vec_popcnt.ll =================================================================== --- test/CodeGen/PowerPC/vec_popcnt.ll +++ test/CodeGen/PowerPC/vec_popcnt.ll @@ -0,0 +1,72 @@ +; Check the vecpopcnt* instructions that were added in P8 +; In addition, check the conversions to/from the v2i64 VMX register that was also added in P8. +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s + +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone +declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone + +define <16 x i8> @test_v16i8_v2i64(<2 x i64> %x) nounwind readnone { + %tmp = bitcast <2 x i64> %x to <16 x i8>; + %vcnt = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp) + ret <16 x i8> %vcnt +; CHECK: @test_v16i8_v2i64 +; CHECK: vpopcntb 2, 2 +; CHECK: blr +} + +define <8 x i16> @test_v8i16_v2i64(<2 x i64> %x) nounwind readnone { + %tmp = bitcast <2 x i64> %x to <8 x i16> + %vcnt = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp) + ret <8 x i16> %vcnt +; CHECK: @test_v8i16_v2i64 +; CHECK: vpopcnth 2, 2 +; CHECK: blr +} + +define <4 x i32> @test_v4i32_v2i64(<2 x i64> %x) nounwind readnone { + %tmp = bitcast <2 x i64> %x to <4 x i32> + %vcnt = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp) + ret <4 x i32> %vcnt +; CHECK: @test_v4i32_v2i64 +; CHECK: vpopcntw 2, 2 +; CHECK: blr +} + +define <2 x i64> @test_v2i64_v2i64(<2 x i64> %x) nounwind readnone { + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v2i64 +; CHECK: vpopcntd 2, 2 +; CHECK: blr +} + +define <2 x i64> @test_v2i64_v4i32(<4 x i32> %x) nounwind readnone { + %tmp = bitcast <4 x i32> %x to <2 x i64> + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v4i32 +; CHECK: vpopcntd 2, 2 +; CHECK: blr +} + + +define <2 x i64> @test_v2i64_v8i16(<8 x i16> %x) nounwind readnone { + %tmp = bitcast <8 x i16> %x to <2 x i64> + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v8i16 +; CHECK: vpopcntd 2, 2 +; CHECK: blr +} + +define <2 x i64> @test_v2i64_v16i8(<16 x i8> %x) nounwind readnone { + %tmp = bitcast <16 x i8> %x to <2 x i64> + %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp) + ret <2 x i64> %vcnt +; CHECK: @test_v2i64_v16i8 +; CHECK: vpopcntd 2, 2 +; CHECK: blr +} Index: test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-vmx.txt @@ -501,6 +501,18 @@ # CHECK: vrsqrtefp 2, 3 0x10 0x40 0x19 0x4a +# CHECK: vpopcntb 2, 3 +0x10 0x40 0x1f 0x03 + +# CHECK: vpopcnth 2, 3 +0x10 0x40 0x1f 0x43 + +# CHECK: vpopcntw 2, 3 +0x10 0x40 0x1f 0x83 + +# CHECK: vpopcntd 2, 3 +0x10 0x40 0x1f 0xc3 + # CHECK: mtvscr 2 0x10 0x00 0x16 0x44 Index: test/MC/PowerPC/ppc64-encoding-vmx.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-vmx.s +++ test/MC/PowerPC/ppc64-encoding-vmx.s @@ -543,6 +543,24 @@ # CHECK-LE: vrsqrtefp 2, 3 # encoding: [0x4a,0x19,0x40,0x10] vrsqrtefp 2, 3 +# Vector population count instructions +# CHECK-BE: vpopcntb 2, 3 # encoding: [0x10,0x40,0x1f,0x03] +# CHECK-LE: vpopcntb 2, 3 # encoding: [0x03,0x1f,0x40,0x10] + vpopcntb 2, 3 + +# CHECK-BE: vpopcnth 2, 3 # encoding: [0x10,0x40,0x1f,0x43] +# CHECK-LE: vpopcnth 2, 3 # encoding: [0x43,0x1f,0x40,0x10] + vpopcnth 2, 3 + +# KB: This is intentionally incorrect to make sure we catch it +# CHECK-BE: vpopcntw 2, 3 # encoding: [0x10,0x40,0x1f,0x83] +# CHECK-LE: vpopcntw 2, 3 # encoding: [0x83,0x1f,0x40,0x10] + vpopcntw 2, 3 + +# BCHECK-BE: vpopcntd 2, 3 # encoding: [0x10,0x40,0x1f,0xC3] +# BCHECK-LE: vpopcntd 2, 3 # encoding: [0xC3,0x1f,0x40,0x10] +# vpopcntd 2, 3 + # Vector status and control register instructions # CHECK-BE: mtvscr 2 # encoding: [0x10,0x00,0x16,0x44]