diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5365,7 +5365,8 @@ } static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) { - return Subtarget.hasVLX() || (Subtarget.hasAVX512() && VT.is512BitVector()); + return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() || + VT.is512BitVector(); } bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, @@ -46230,7 +46231,8 @@ SDValue B = DAG.getBitcast(VT, N0.getOperand(0)); SDValue C = DAG.getBitcast(VT, N1.getOperand(0)); SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8); - return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm); + return getAVX512Node(X86ISD::VPTERNLOG, DL, VT, {A, B, C, Imm}, DAG, + Subtarget); } SDValue X = N->getOperand(0); diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -32,9 +32,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_rr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744069414584319,18446744060824649725] +; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_rr: @@ -72,10 +75,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_rm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744065119617022,18446744073709551612] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_rm: @@ -115,10 +120,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_mr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [12884901890,4294967296] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_mr: @@ -161,11 +168,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_mm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %xmm0 -; AVX512F-NEXT: vmovaps (%rsi), %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512F-NEXT: vmovdqa (%rsi), %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744073709551612,18446744065119617022] +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_mm: @@ -317,9 +325,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_rr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725] +; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_rr: @@ -365,10 +375,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_rm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_rm: @@ -416,10 +427,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_mr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_mr: @@ -467,11 +479,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_mm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %ymm0 -; AVX512F-NEXT: vmovaps (%rsi), %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_mm: diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -2859,11 +2859,12 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2875,20 +2876,22 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -2521,11 +2521,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: @@ -2537,20 +2537,20 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -2090,11 +2090,12 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm2 ; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2106,11 +2107,12 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm2 ; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: @@ -2122,11 +2124,12 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm2 ; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -453,20 +453,18 @@ ; ; AVX512F-LABEL: var_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 +; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] +; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -1756,11 +1754,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: @@ -1772,11 +1770,11 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm2 ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: @@ -1788,11 +1786,11 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm2 ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -2578,11 +2578,12 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2594,20 +2595,22 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -2199,11 +2199,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: @@ -2215,20 +2215,20 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -2178,11 +2178,12 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2 ; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2194,11 +2195,12 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2 ; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: @@ -2210,11 +2212,12 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 ; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -490,22 +490,20 @@ ; ; AVX512F-LABEL: var_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpsubb %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 +; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] +; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -1853,11 +1851,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: @@ -1869,11 +1867,11 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: @@ -1885,11 +1883,11 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -2060,10 +2060,11 @@ ; AVX512F-LABEL: splatconstant_rotate_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 +; AVX512F-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v16i8: @@ -2076,10 +2077,11 @@ ; AVX512BW-LABEL: splatconstant_rotate_v16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v16i8: @@ -2092,10 +2094,11 @@ ; AVX512VBMI2-LABEL: splatconstant_rotate_v16i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -449,20 +449,18 @@ ; ; AVX512F-LABEL: var_rotate_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 +; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] +; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -1745,11 +1743,11 @@ ; ; AVX512F-LABEL: splatconstant_rotate_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v32i8: @@ -1762,10 +1760,10 @@ ; AVX512BW-LABEL: splatconstant_rotate_v32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512BW-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 +; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v32i8: @@ -1778,10 +1776,10 @@ ; AVX512VBMI2-LABEL: splatconstant_rotate_v32i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 +; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_v32i8: @@ -2103,11 +2101,10 @@ ; ; AVX512F-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: retq ;