Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12212,7 +12212,8 @@ /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, - SelectionDAG &DAG) { + SelectionDAG &DAG, + CombineLevel Level) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -12237,10 +12238,14 @@ else if (Opcode == ISD::ZERO_EXTEND) ExtLoadOpcode = ISD::ZEXTLOAD; + // Illegal VSELECT may ISel fail if happen after legalization (DAG + // Combine2), so we should conservatively check the OperationAction. LoadSDNode *Load1 = cast(Op1); LoadSDNode *Load2 = cast(Op2); if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) || - !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT())) + !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) || + (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes && + TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal)) return SDValue(); SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1); @@ -13108,7 +13113,7 @@ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13459,7 +13464,7 @@ if (SDValue V = widenAbs(N, DAG)) return V; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13620,7 +13625,7 @@ if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1039,6 +1039,8 @@ break; assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); + assert(N->getValueType(0).getVectorElementType() != MVT::i16 && + "We can't replace VSELECT with BLENDV in vXi16!"); SDValue Blendv = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1), N->getOperand(2)); Index: llvm/test/CodeGen/X86/vselect-post-combine.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/vselect-post-combine.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 + +define ptr @test_mul(<32 x i8> %vec0) { +; AVX2-LABEL: test_mul: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [255,0,0,0] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpblendvb %xmm0, (%rcx), %xmm1, %xmm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: vmovdqu %ymm0, 0 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +entry: + %vec1 = shufflevector <32 x i8> %vec0, <32 x i8> , <32 x i32> + %0 = bitcast <32 x i8> %vec1 to <4 x i64> + %shuffle.i.i.i6.i.i = shufflevector <4 x i64> %0, <4 x i64> zeroinitializer, <2 x i32> + %1 = bitcast <2 x i64> %shuffle.i.i.i6.i.i to <16 x i8> + %conv = zext <16 x i8> %1 to <16 x i16> + store <16 x i16> %conv, ptr null, align 1 + ret ptr null +}