diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34113,7 +34113,7 @@ Results.push_back(V); return; } - case ISD::BITREVERSE: + case ISD::BITREVERSE: { assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); assert(Subtarget.hasXOP() && "Expected XOP"); // We can use VPPERM by copying to a vector register and back. We'll need @@ -34121,6 +34121,21 @@ Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG)); return; } + case ISD::EXTRACT_VECTOR_ELT: { + // f16 = extract vXf16 %vec, i64 %idx + assert(N->getSimpleValueType(0) == MVT::f16 && + "Unexpected Value type of EXTRACT_VECTOR_ELT!"); + assert(Subtarget.hasFP16() && "Expected FP16"); + SDValue VecOp = N->getOperand(0); + EVT ExtVT = VecOp.getValueType().changeVectorElementTypeToInteger(); + SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0)); + Split = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Split, + N->getOperand(1)); + Split = DAG.getBitcast(MVT::f16, Split); + Results.push_back(Split); + return; + } + } } const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll --- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll @@ -1349,6 +1349,74 @@ ret half %res } +define half @extract_f16_8(<32 x half> %x, i64 %idx) nounwind { +; X64-LABEL: extract_f16_8: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: andq $-64, %rsp +; X64-NEXT: subq $128, %rsp +; X64-NEXT: andl $31, %edi +; X64-NEXT: vmovaps %zmm0, (%rsp) +; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0 +; X64-NEXT: movq %rbp, %rsp +; X64-NEXT: popq %rbp +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: extract_f16_8: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-64, %esp +; X86-NEXT: subl $128, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: andl $31, %eax +; X86-NEXT: vmovaps %zmm0, (%esp) +; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl + %res = extractelement <32 x half> %x, i64 %idx + ret half %res +} + +define half @extract_f16_9(<64 x half> %x, i64 %idx) nounwind { +; X64-LABEL: extract_f16_9: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: movq %rsp, %rbp +; X64-NEXT: andq $-64, %rsp +; X64-NEXT: subq $192, %rsp +; X64-NEXT: andl $63, %edi +; X64-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp) +; X64-NEXT: vmovaps %zmm0, (%rsp) +; X64-NEXT: vmovsh (%rsp,%rdi,2), %xmm0 +; X64-NEXT: movq %rbp, %rsp +; X64-NEXT: popq %rbp +; X64-NEXT: vzeroupper +; X64-NEXT: retq +; +; X86-LABEL: extract_f16_9: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-64, %esp +; X86-NEXT: subl $192, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: andl $63, %eax +; X86-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) +; X86-NEXT: vmovaps %zmm0, (%esp) +; X86-NEXT: vmovsh (%esp,%eax,2), %xmm0 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: vzeroupper +; X86-NEXT: retl + %res = extractelement <64 x half> %x, i64 %idx + ret half %res +} + define i16 @extract_i16_0(<8 x i16> %x) { ; CHECK-LABEL: extract_i16_0: ; CHECK: # %bb.0: @@ -1985,10 +2053,10 @@ ; X64-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: vmovw %xmm0, %eax ; X64-NEXT: testw %ax, %ax -; X64-NEXT: je .LBB121_2 +; X64-NEXT: je .LBB123_2 ; X64-NEXT: # %bb.1: # %for.body.preheader ; X64-NEXT: movb $0, (%rsi) -; X64-NEXT: .LBB121_2: # %for.end +; X64-NEXT: .LBB123_2: # %for.end ; X64-NEXT: retq ; ; X86-LABEL: pr52560: @@ -2000,11 +2068,11 @@ ; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: vmovw %xmm0, %eax ; X86-NEXT: testw %ax, %ax -; X86-NEXT: je .LBB121_2 +; X86-NEXT: je .LBB123_2 ; X86-NEXT: # %bb.1: # %for.body.preheader ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb $0, (%eax) -; X86-NEXT: .LBB121_2: # %for.end +; X86-NEXT: .LBB123_2: # %for.end ; X86-NEXT: retl entry: %conv = sext i8 %0 to i16