diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5365,7 +5365,8 @@ } static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) { - return Subtarget.hasVLX() || (Subtarget.hasAVX512() && VT.is512BitVector()); + return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() || + VT.is512BitVector(); } bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, @@ -6399,29 +6400,64 @@ ArrayRef Ops, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(Subtarget.hasAVX512() && "AVX512 target expected"); + MVT SVT = VT.getScalarType(); - // If we have VLX or the type is already 512-bits, then create the node - // directly. - if (Subtarget.hasVLX() || VT.is512BitVector()) - return DAG.getNode(Opcode, DL, VT, Ops); + // If we have a 32/64 splatted constant, splat it to DstTy to + // encourage a foldable broadcast'd operand. + auto MakeBroadcastOp = [&](SDValue Op, MVT OpVT, MVT DstVT) { + unsigned OpEltSizeInBits = OpVT.getScalarSizeInBits(); + // AVX512 broadcasts 32/64-bit operands. + // TODO: Support float once getAVX512Node is used by fp-ops. + if (!OpVT.isInteger() || OpEltSizeInBits < 32 || + !DAG.getTargetLoweringInfo().isTypeLegal(SVT)) + return SDValue(); + // If we're not widening, don't bother if we're not bitcasting. + if (OpVT == DstVT && Op.getOpcode() != ISD::BITCAST) + return SDValue(); + if (auto *BV = dyn_cast(peekThroughBitcasts(Op))) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, OpEltSizeInBits) && + !HasAnyUndefs && SplatValue.getBitWidth() == OpEltSizeInBits) { + return DAG.getConstant(SplatValue, DL, DstVT); + } + } + return SDValue(); + }; - // Widen the vector ops. - MVT SVT = VT.getScalarType(); - MVT WideVT = MVT::getVectorVT(SVT, 512 / SVT.getSizeInBits()); - SmallVector WideOps(Ops.begin(), Ops.end()); - for (SDValue &Op : WideOps) { + bool Widen = !(Subtarget.hasVLX() || VT.is512BitVector()); + + MVT DstVT = VT; + if (Widen) + DstVT = MVT::getVectorVT(SVT, 512 / SVT.getSizeInBits()); + + // Canonicalize src operands. + SmallVector SrcOps(Ops.begin(), Ops.end()); + for (SDValue &Op : SrcOps) { MVT OpVT = Op.getSimpleValueType(); // Just pass through scalar operands. if (!OpVT.isVector()) continue; - assert(OpVT.getSizeInBits() == VT.getSizeInBits() && - "Vector size mismatch"); - Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512); + assert(OpVT == VT && "Vector type mismatch"); + + if (SDValue BroadcastOp = MakeBroadcastOp(Op, OpVT, DstVT)) { + Op = BroadcastOp; + continue; + } + + // Just widen the subvector by inserting into an undef wide vector. + if (Widen) + Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512); } + SDValue Res = DAG.getNode(Opcode, DL, DstVT, SrcOps); + // Perform the 512-bit op then extract the bottom subvector. - SDValue Res = DAG.getNode(Opcode, DL, WideVT, WideOps); - return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits()); + if (Widen) + Res = extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits()); + return Res; } /// Insert i1-subvector to i1-vector. @@ -46190,7 +46226,8 @@ assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); MVT VT = N->getSimpleValueType(0); - if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + if (!VT.isVector() || (EltSizeInBits % 8) != 0) return SDValue(); SDValue N0 = peekThroughBitcasts(N->getOperand(0)); @@ -46226,11 +46263,17 @@ if (useVPTERNLOG(Subtarget, VT)) { // Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C. - SDValue A = DAG.getBitcast(VT, N0.getOperand(1)); - SDValue B = DAG.getBitcast(VT, N0.getOperand(0)); - SDValue C = DAG.getBitcast(VT, N1.getOperand(0)); + // VPTERNLOG is only available as vXi32/64-bit types. + MVT OpSVT = EltSizeInBits == 32 ? MVT::i32 : MVT::i64; + MVT OpVT = + MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits()); + SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1)); + SDValue B = DAG.getBitcast(OpVT, N0.getOperand(0)); + SDValue C = DAG.getBitcast(OpVT, N1.getOperand(0)); SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8); - return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm); + SDValue Res = getAVX512Node(X86ISD::VPTERNLOG, DL, OpVT, {A, B, C, Imm}, + DAG, Subtarget); + return DAG.getBitcast(VT, Res); } SDValue X = N->getOperand(0); diff --git a/llvm/test/CodeGen/X86/avx512fp16-arith.ll b/llvm/test/CodeGen/X86/avx512fp16-arith.ll --- a/llvm/test/CodeGen/X86/avx512fp16-arith.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-arith.ll @@ -370,8 +370,7 @@ define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) { ; CHECK-LABEL: fcopysignv8f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 ; CHECK-NEXT: retq %a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y) ret <8 x half> %a @@ -412,8 +411,7 @@ define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) { ; CHECK-LABEL: fcopysignv16f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; CHECK-NEXT: retq %a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y) ret <16 x half> %a @@ -454,8 +452,7 @@ define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) { ; CHECK-LABEL: fcopysignv32f16: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] -; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; CHECK-NEXT: retq %a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y) ret <32 x half> %a diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -32,9 +32,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_rr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744069414584319,18446744060824649725] +; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_rr: @@ -72,10 +75,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_rm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744065119617022,18446744073709551612] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_rm: @@ -115,10 +120,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_mr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [12884901890,4294967296] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_mr: @@ -161,11 +168,12 @@ ; ; AVX512F-LABEL: bitselect_v2i64_mm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %xmm0 -; AVX512F-NEXT: vmovaps (%rsi), %xmm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vorps %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512F-NEXT: vmovdqa (%rsi), %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744073709551612,18446744065119617022] +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v2i64_mm: @@ -317,9 +325,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_rr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18446744069414584319,18446744060824649725,18446744060824649725,18446744060824649725] +; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_rr: @@ -365,10 +375,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_rm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_rm: @@ -416,10 +427,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_mr: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [12884901890,4294967296,12884901890,4294967296] +; AVX512F-NEXT: vpternlogq $184, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_mr: @@ -467,11 +479,11 @@ ; ; AVX512F-LABEL: bitselect_v4i64_mm: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovaps (%rdi), %ymm0 -; AVX512F-NEXT: vmovaps (%rsi), %ymm1 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vorps %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022] +; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: bitselect_v4i64_mm: diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1666,12 +1666,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm3 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; CHECK-NEXT: vpsllw $5, %ymm1, %ymm1 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2 ; CHECK-NEXT: vpsrlw $6, %ymm0, %ymm3 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; CHECK-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1784,7 +1784,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; CHECK-NEXT: retq %shl = shl <32 x i8> %a, %lshr = lshr <32 x i8> %a, @@ -1797,7 +1797,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0 -; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; CHECK-NEXT: retq %shl = shl <32 x i8> %a, diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -2859,11 +2859,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2875,20 +2875,20 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -2521,50 +2521,47 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -1412,7 +1412,7 @@ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1425,35 +1425,35 @@ ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -2090,11 +2090,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2106,11 +2106,11 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: @@ -2122,11 +2122,11 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -453,20 +453,16 @@ ; ; AVX512F-LABEL: var_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -479,12 +475,12 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1756,50 +1752,47 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -1039,7 +1039,7 @@ ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1051,35 +1051,35 @@ ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -2578,11 +2578,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2594,20 +2594,20 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -2199,50 +2199,47 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512BW-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -1394,7 +1394,7 @@ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1407,35 +1407,35 @@ ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -2178,11 +2178,11 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i8: @@ -2194,11 +2194,11 @@ ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8: @@ -2210,11 +2210,11 @@ ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm1 +; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -490,22 +490,18 @@ ; ; AVX512F-LABEL: var_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpsubb %ymm1, %ymm3, %ymm1 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -518,14 +514,14 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1853,50 +1849,47 @@ ; ; AVX512F-LABEL: splatconstant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1 +; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm1 ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -1063,7 +1063,7 @@ ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: @@ -1075,35 +1075,35 @@ ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm1 ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> ) ret <64 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -2060,10 +2060,10 @@ ; AVX512F-LABEL: splatconstant_rotate_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v16i8: @@ -2076,10 +2076,10 @@ ; AVX512BW-LABEL: splatconstant_rotate_v16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v16i8: @@ -2092,10 +2092,10 @@ ; AVX512VBMI2-LABEL: splatconstant_rotate_v16i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VBMI2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI2-NEXT: vzeroupper ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -449,20 +449,16 @@ ; ; AVX512F-LABEL: var_rotate_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm2 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm3 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm3 ; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm3 @@ -475,12 +471,12 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2 ; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 @@ -1745,50 +1741,47 @@ ; ; AVX512F-LABEL: splatconstant_rotate_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v32i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VBMI2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 +; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_v32i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_v32i8: @@ -2103,11 +2096,9 @@ ; ; AVX512F-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -2115,7 +2106,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -992,7 +992,7 @@ ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v64i8: @@ -1004,35 +1004,35 @@ ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v64i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_v64i8: ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1 ; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 ; AVX512VLVBMI2-NEXT: retq %shl = shl <64 x i8> %a, %lshr = lshr <64 x i8> %a,