diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1039,11 +1039,19 @@ break; assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); - SDValue Blendv = - CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1), N->getOperand(2)); + SDValue R; + if (Subtarget->hasVLX() && Subtarget->hasAVX512() && + N->getOperand(0)->getOpcode() == X86ISD::PCMPGT) { + R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), N->getOperand(2), + CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8)); + } else { + R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), + N->getOperand(2)); + } --I; - CurDAG->ReplaceAllUsesWith(N, Blendv.getNode()); + CurDAG->ReplaceAllUsesWith(N, R.getNode()); ++I; MadeChange = true; continue; diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll --- a/llvm/test/CodeGen/X86/abds-vector-128.ll +++ b/llvm/test/CodeGen/X86/abds-vector-128.ll @@ -829,7 +829,7 @@ ; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm2 ; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm3 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: vpternlogq $202, %xmm3, %xmm2, %xmm0 ; AVX512-NEXT: retq %cmp = icmp sgt <16 x i8> %a, %b %ab = sub <16 x i8> %a, %b diff --git a/llvm/test/CodeGen/X86/abds-vector-256.ll b/llvm/test/CodeGen/X86/abds-vector-256.ll --- a/llvm/test/CodeGen/X86/abds-vector-256.ll +++ b/llvm/test/CodeGen/X86/abds-vector-256.ll @@ -468,7 +468,7 @@ ; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm2 ; AVX512-NEXT: vpsubb %ymm0, %ymm1, %ymm3 ; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0 +; AVX512-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0 ; AVX512-NEXT: retq %cmp = icmp sgt <32 x i8> %a, %b %ab = sub <32 x i8> %a, %b diff --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll --- a/llvm/test/CodeGen/X86/var-permute-256.ll +++ b/llvm/test/CodeGen/X86/var-permute-256.ll @@ -194,12 +194,12 @@ ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] +; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v16i16: @@ -313,9 +313,9 @@ ; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v32i8: @@ -739,11 +739,11 @@ ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v16i16_from_v8i16: @@ -857,9 +857,9 @@ ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v32i8_from_v16i8: