diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20533,17 +20533,20 @@ unsigned NumElts = VecVT.getVectorNumElements(); // Extending v8i1/v16i1 to 512-bit get better performance on KNL // than extending to 128/256bit. - MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; - MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); - SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx); - return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + if (NumElts != 1) { + MVT ExtEltVT = + (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; + MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec); + SDValue Elt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx); + return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + } + } else { + if (IdxC->getZExtValue() == 0) // the operation is legal + return Op; } - unsigned IdxVal = IdxC->getZExtValue(); - if (IdxVal == 0) // the operation is legal - return Op; - // Extend to natively supported kshift. unsigned NumElems = VecVT.getVectorNumElements(); MVT WideVecVT = VecVT; @@ -20553,10 +20556,15 @@ DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl)); } + if (NumElems == 1) { + if (Subtarget.hasDQI()) + return DAG.getBitcast(MVT::i8, Vec); + return DAG.getNode(ISD::TRUNCATE, dl, EltVT, DAG.getBitcast(MVT::i16, Vec)); + } // Use kshiftr instruction to move to the lower element. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, - DAG.getTargetConstant(IdxVal, dl, MVT::i8)); + DAG.getTargetConstant(IdxC->getZExtValue(), dl, MVT::i8)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, DAG.getIntPtrConstant(0, dl)); diff --git a/llvm/test/CodeGen/X86/pr64322.ll b/llvm/test/CodeGen/X86/pr64322.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr64322.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s + +@G = global <1 x i1> +@G.1 = global i1 false + +define void @foo(i32 %x) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movq G@GOTPCREL(%rip), %rax +; CHECK-NEXT: kmovb (%rax), %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movq G.1@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movb %al, (%rcx) +; CHECK-NEXT: retq + %LGV = load <1 x i1>, ptr @G + %E = extractelement <1 x i1> %LGV, i32 %x + store i1 %E, ptr @G.1 + ret void +}