diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6449,7 +6449,7 @@ // Just widen the subvector by inserting into an undef wide vector. if (Widen) - Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512); + Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512); } SDValue Res = DAG.getNode(Opcode, DL, DstVT, SrcOps); diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll --- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll @@ -370,8 +370,7 @@ define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) { ; CHECK-LABEL: fcopysignv8f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 ; CHECK-NEXT: retq %a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y) ret <8 x half> %a @@ -412,8 +411,7 @@ define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) { ; CHECK-LABEL: fcopysignv16f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; CHECK-NEXT: retq %a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y) ret <16 x half> %a @@ -454,8 +452,7 @@ define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) { ; CHECK-LABEL: fcopysignv32f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; CHECK-NEXT: retq %a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y) ret <32 x half> %a diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1666,12 +1666,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm3 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2 ; CHECK-NEXT: vpsrlw $6, %ymm0, %ymm3 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; CHECK-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1784,7 +1784,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; CHECK-NEXT: retq %shl = shl <32 x i8> %a, %lshr = lshr <32 x i8> %a, @@ -1797,7 +1797,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; CHECK-NEXT: retq %shl = shl <32 x i8> %a, diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -2859,10 +2859,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2876,20 +2875,18 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -2521,10 +2521,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; @@ -2532,24 +2531,22 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; @@ -2557,14 +2554,14 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -1412,7 +1412,7 @@ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1425,35 +1425,35 @@ ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -2090,10 +2090,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2107,10 +2106,9 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2124,10 +2122,9 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -453,18 +453,16 @@ ; ; AVX512F-LABEL: var_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] -; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -477,12 +475,12 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1754,10 +1752,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; @@ -1765,15 +1762,14 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; @@ -1781,15 +1777,14 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; @@ -1797,7 +1792,7 @@ ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -1039,7 +1039,7 @@ ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1051,35 +1051,35 @@ ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -2578,10 +2578,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2595,20 +2594,18 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -2199,10 +2199,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; @@ -2210,24 +2209,22 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; @@ -2235,14 +2232,14 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -1394,7 +1394,7 @@ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1407,35 +1407,35 @@ ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -2178,10 +2178,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2195,10 +2194,9 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2212,10 +2210,9 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -490,20 +490,18 @@ ; ; AVX512F-LABEL: var_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] -; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -516,14 +514,14 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1851,10 +1849,9 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; @@ -1862,15 +1859,14 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; @@ -1878,15 +1874,14 @@ ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; @@ -1894,7 +1889,7 @@ ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1 ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -1063,7 +1063,7 @@ ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1075,35 +1075,35 @@ ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -2060,10 +2060,9 @@ ; AVX512F-LABEL: splatconstant_rotate_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 -; AVX512F-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -2077,10 +2076,9 @@ ; AVX512BW-LABEL: splatconstant_rotate_v16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 -; AVX512BW-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -2094,10 +2092,9 @@ ; AVX512VBMI2-LABEL: splatconstant_rotate_v16i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -449,18 +449,16 @@ ; ; AVX512F-LABEL: var_rotate_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm2, %zmm4 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252] -; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm3, %zmm4 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -473,12 +471,12 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1743,10 +1741,9 @@ ; ; AVX512F-LABEL: splatconstant_rotate_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; @@ -1754,39 +1751,37 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 -; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v32i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpternlogq $226, %zmm0, %zmm2, %zmm1 -; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_v32i8: @@ -2101,10 +2096,9 @@ ; ; AVX512F-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpternlogq $226, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -2112,7 +2106,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -992,7 +992,7 @@ ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v64i8: @@ -1004,35 +1004,35 @@ ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %shl = shl <64 x i8> %a, %lshr = lshr <64 x i8> %a,