Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2662,7 +2662,7 @@ // If CTPOP is legal, use it. Otherwise use shifts and xor. SDValue Result; - if (TLI.isOperationLegal(ISD::CTPOP, VT)) { + if (TLI.isOperationLegalOrCustom(ISD::CTPOP, VT)) { Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); } else { Result = Op; Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -431,6 +431,7 @@ setOperationAction(ISD::PARITY, MVT::i64, Custom); if (Subtarget.hasPOPCNT()) { setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); + setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32); } else { setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); Index: llvm/test/CodeGen/X86/parity-vec.ll =================================================================== --- llvm/test/CodeGen/X86/parity-vec.ll +++ llvm/test/CodeGen/X86/parity-vec.ll @@ -18,7 +18,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $8, %ecx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $2, %ecx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: xorl %ecx, %eax ; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq @@ -33,8 +44,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax -; CHECK-NEXT: # kill: def $al killed $al killed $ax +; CHECK-NEXT: popcntl %eax, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) @@ -62,7 +73,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $8, %ecx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $2, %ecx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: xorl %ecx, %eax ; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq @@ -77,7 +99,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: psllw $7, %xmm0 ; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntw %ax, %ax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $8, %ecx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: xorl %ecx, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $2, %ecx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: xorl %ecx, %eax ; CHECK-NEXT: andl $1, %eax ; CHECK-NEXT: xorb $1, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax Index: llvm/test/CodeGen/X86/parity.ll =================================================================== --- llvm/test/CodeGen/X86/parity.ll +++ llvm/test/CodeGen/X86/parity.ll @@ -59,14 +59,38 @@ ; ; X86-POPCNT-LABEL: parity_16: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: movl %eax, %ecx +; X86-POPCNT-NEXT: shrl $8, %ecx +; X86-POPCNT-NEXT: xorl %eax, %ecx +; X86-POPCNT-NEXT: movl %ecx, %eax +; X86-POPCNT-NEXT: shrl $4, %eax +; X86-POPCNT-NEXT: xorl %ecx, %eax +; X86-POPCNT-NEXT: movl %eax, %ecx +; X86-POPCNT-NEXT: shrl $2, %ecx +; X86-POPCNT-NEXT: xorl %eax, %ecx +; X86-POPCNT-NEXT: movl %ecx, %eax +; X86-POPCNT-NEXT: shrl %eax +; X86-POPCNT-NEXT: xorl %ecx, %eax ; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: parity_16: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: movl %edi, %eax +; X64-POPCNT-NEXT: movzbl %ah, %ecx +; X64-POPCNT-NEXT: xorl %edi, %ecx +; X64-POPCNT-NEXT: movzwl %cx, %eax +; X64-POPCNT-NEXT: movl %eax, %ecx +; X64-POPCNT-NEXT: shrl $4, %ecx +; X64-POPCNT-NEXT: xorl %eax, %ecx +; X64-POPCNT-NEXT: movl %ecx, %edx +; X64-POPCNT-NEXT: shrl $2, %edx +; X64-POPCNT-NEXT: xorl %ecx, %edx +; X64-POPCNT-NEXT: movl %edx, %eax +; X64-POPCNT-NEXT: shrl %eax +; X64-POPCNT-NEXT: xorl %edx, %eax ; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-POPCNT-NEXT: retq @@ -98,14 +122,38 @@ ; X86-POPCNT-LABEL: parity_16_load: ; X86-POPCNT: # %bb.0: ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: popcntw (%eax), %ax +; X86-POPCNT-NEXT: movzwl (%eax), %eax +; X86-POPCNT-NEXT: movl %eax, %ecx +; X86-POPCNT-NEXT: shrl $8, %ecx +; X86-POPCNT-NEXT: xorl %eax, %ecx +; X86-POPCNT-NEXT: movl %ecx, %eax +; X86-POPCNT-NEXT: shrl $4, %eax +; X86-POPCNT-NEXT: xorl %ecx, %eax +; X86-POPCNT-NEXT: movl %eax, %ecx +; X86-POPCNT-NEXT: shrl $2, %ecx +; X86-POPCNT-NEXT: xorl %eax, %ecx +; X86-POPCNT-NEXT: movl %ecx, %eax +; X86-POPCNT-NEXT: shrl %eax +; X86-POPCNT-NEXT: xorl %ecx, %eax ; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: parity_16_load: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw (%rdi), %ax +; X64-POPCNT-NEXT: movzwl (%rdi), %eax +; X64-POPCNT-NEXT: movl %eax, %ecx +; X64-POPCNT-NEXT: shrl $8, %ecx +; X64-POPCNT-NEXT: xorl %eax, %ecx +; X64-POPCNT-NEXT: movl %ecx, %eax +; X64-POPCNT-NEXT: shrl $4, %eax +; X64-POPCNT-NEXT: xorl %ecx, %eax +; X64-POPCNT-NEXT: movl %eax, %ecx +; X64-POPCNT-NEXT: shrl $2, %ecx +; X64-POPCNT-NEXT: xorl %eax, %ecx +; X64-POPCNT-NEXT: movl %ecx, %eax +; X64-POPCNT-NEXT: shrl %eax +; X64-POPCNT-NEXT: xorl %ecx, %eax ; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-POPCNT-NEXT: retq @@ -420,7 +468,19 @@ ; ; X86-POPCNT-LABEL: parity_16_shift: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: movl %eax, %ecx +; X86-POPCNT-NEXT: shrl $8, %ecx +; X86-POPCNT-NEXT: xorl %eax, %ecx +; X86-POPCNT-NEXT: movl %ecx, %eax +; X86-POPCNT-NEXT: shrl $4, %eax +; X86-POPCNT-NEXT: xorl %ecx, %eax +; X86-POPCNT-NEXT: movl %eax, %ecx +; X86-POPCNT-NEXT: shrl $2, %ecx +; X86-POPCNT-NEXT: xorl %eax, %ecx +; X86-POPCNT-NEXT: movl %ecx, %eax +; X86-POPCNT-NEXT: shrl %eax +; X86-POPCNT-NEXT: xorl %ecx, %eax ; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: addl %eax, %eax ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax @@ -428,7 +488,19 @@ ; ; X64-POPCNT-LABEL: parity_16_shift: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: movl %edi, %eax +; X64-POPCNT-NEXT: movzbl %ah, %ecx +; X64-POPCNT-NEXT: xorl %edi, %ecx +; X64-POPCNT-NEXT: movzwl %cx, %eax +; X64-POPCNT-NEXT: movl %eax, %ecx +; X64-POPCNT-NEXT: shrl $4, %ecx +; X64-POPCNT-NEXT: xorl %eax, %ecx +; X64-POPCNT-NEXT: movl %ecx, %edx +; X64-POPCNT-NEXT: shrl $2, %edx +; X64-POPCNT-NEXT: xorl %ecx, %edx +; X64-POPCNT-NEXT: movl %edx, %eax +; X64-POPCNT-NEXT: shrl %eax +; X64-POPCNT-NEXT: xorl %edx, %eax ; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: addl %eax, %eax ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax Index: llvm/test/CodeGen/X86/popcnt.ll =================================================================== --- llvm/test/CodeGen/X86/popcnt.ll +++ llvm/test/CodeGen/X86/popcnt.ll @@ -109,12 +109,16 @@ ; ; X86-POPCNT-LABEL: cnt16: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax +; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: cnt16: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax +; X64-POPCNT-NEXT: movzwl %di, %eax +; X64-POPCNT-NEXT: popcntl %eax, %eax +; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-POPCNT-NEXT: retq %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) ret i16 %cnt @@ -1566,14 +1570,13 @@ ; ; X86-POPCNT-LABEL: popcount_i16_zext: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax -; X86-POPCNT-NEXT: movzwl %ax, %eax +; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: popcount_i16_zext: ; X64-POPCNT: # %bb.0: -; X64-POPCNT-NEXT: popcntw %di, %ax -; X64-POPCNT-NEXT: movzwl %ax, %eax +; X64-POPCNT-NEXT: popcntl %edi, %eax ; X64-POPCNT-NEXT: retq %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) %z = zext i16 %cnt to i32