diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1725,11 +1725,42 @@ case ISD::ROTR: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + bool IsROTL = (Op.getOpcode() == ISD::ROTL); // If we're rotating an 0/-1 value, then it stays an 0/-1 value. if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1)) return TLO.CombineTo(Op, Op0); + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { + unsigned Amt = SA->getAPIntValue().urem(BitWidth); + unsigned RevAmt = BitWidth - Amt; + + // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt)) + // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt) + APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt); + if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + + // rot*(x, 0) --> x + if (Amt == 0) + return TLO.CombineTo(Op, Op0); + + // See if we don't demand either half of the rotated bits. + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) && + DemandedBits.countTrailingZeros() >= (IsROTL ? RevAmt : Amt)) { + if (!IsROTL) + Op1 = TLO.DAG.getConstant(RevAmt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1)); + } + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) && + DemandedBits.countLeadingZeros() >= (IsROTL ? Amt : RevAmt)) { + if (IsROTL) + Op1 = TLO.DAG.getConstant(RevAmt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + } + } + // For pow-2 bitwidths we only demand the bottom modulo amt bits. if (isPowerOf2_32(BitWidth)) { APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -17049,18 +17049,6 @@ const ARMSubtarget *ST) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); - if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { - // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high - // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. - SDValue N1 = N->getOperand(1); - if (ConstantSDNode *C = dyn_cast(N1)) { - SDValue N0 = N->getOperand(0); - if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && - DAG.MaskedValueIsZero(N0.getOperand(0), - APInt::getHighBitsSet(32, 16))) - return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); - } - } if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 && N->getOperand(0)->getOpcode() == ISD::AND && diff --git a/llvm/test/CodeGen/AArch64/rotate-extract.ll b/llvm/test/CodeGen/AArch64/rotate-extract.ll --- a/llvm/test/CodeGen/AArch64/rotate-extract.ll +++ b/llvm/test/CodeGen/AArch64/rotate-extract.ll @@ -67,8 +67,7 @@ ; CHECK-LABEL: ror_extract_mul_with_mask: ; CHECK: // %bb.0: ; CHECK-NEXT: add x8, x0, x0, lsl #3 -; CHECK-NEXT: ror x8, x8, #57 -; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: lsr x0, x8, #57 ; CHECK-NEXT: ret %lhs_mul = mul i64 %i, 1152 %rhs_mul = mul i64 %i, 9 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq.ll b/llvm/test/CodeGen/AArch64/urem-seteq.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq.ll @@ -78,12 +78,8 @@ define i16 @test_urem_even(i16 %X) nounwind { ; CHECK-LABEL: test_urem_even: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #28087 -; CHECK-NEXT: mul w8, w0, w8 -; CHECK-NEXT: and w9, w8, #0xfffc -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: bfi w9, w8, #15, #17 -; CHECK-NEXT: ubfx w8, w9, #1, #15 +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: ubfiz w8, w8, #14, #1 ; CHECK-NEXT: cmp w8, #2340 ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/ARM/rev.ll b/llvm/test/CodeGen/ARM/rev.ll --- a/llvm/test/CodeGen/ARM/rev.ll +++ b/llvm/test/CodeGen/ARM/rev.ll @@ -133,7 +133,8 @@ define zeroext i16 @test9(i16 zeroext %v) nounwind readnone { ; CHECK-LABEL: test9: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: rev16 r0, r0 +; CHECK-NEXT: rev r0, r0 +; CHECK-NEXT: lsr r0, r0, #16 ; CHECK-NEXT: bx lr entry: %conv = zext i16 %v to i32 diff --git a/llvm/test/CodeGen/ARM/sxt_rot.ll b/llvm/test/CodeGen/ARM/sxt_rot.ll --- a/llvm/test/CodeGen/ARM/sxt_rot.ll +++ b/llvm/test/CodeGen/ARM/sxt_rot.ll @@ -75,7 +75,7 @@ define signext i32 @test5(i32 %A, i32 %X) { ; CHECK-LABEL: test5: ; CHECK: @ %bb.0: -; CHECK-NEXT: sxtah r0, r1, r0, ror #24 +; CHECK-NEXT: add r0, r1, r0, lsr #24 ; CHECK-NEXT: bx lr %B = lshr i32 %A, 24 %C = shl i32 %A, 8 diff --git a/llvm/test/CodeGen/ARM/uxt_rot.ll b/llvm/test/CodeGen/ARM/uxt_rot.ll --- a/llvm/test/CodeGen/ARM/uxt_rot.ll +++ b/llvm/test/CodeGen/ARM/uxt_rot.ll @@ -125,7 +125,7 @@ define zeroext i32 @test10(i32 %A, i32 %X) { ; CHECK-LABEL: test10: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtah r0, r1, r0, ror #24 +; CHECK-NEXT: add r0, r1, r0, lsr #24 ; CHECK-NEXT: bx lr %B = lshr i32 %A, 24 %C = shl i32 %A, 8 @@ -192,7 +192,7 @@ define zeroext i32 @test16(i32 %A, i32 %X) { ; CHECK-LABEL: test16: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtah r0, r1, r0, ror #24 +; CHECK-NEXT: add r0, r1, r0, lsr #24 ; CHECK-NEXT: bx lr %B = lshr i32 %A, 24 %C = shl i32 %A, 8 diff --git a/llvm/test/CodeGen/ARM/uxtb.ll b/llvm/test/CodeGen/ARM/uxtb.ll --- a/llvm/test/CodeGen/ARM/uxtb.ll +++ b/llvm/test/CodeGen/ARM/uxtb.ll @@ -78,7 +78,7 @@ define i32 @test8(i32 %x) { ; CHECK-LABEL: test8: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtb16 r0, r0, ror #24 +; CHECK-NEXT: lsr r0, r0, #24 ; CHECK-NEXT: bx lr %tmp1 = shl i32 %x, 8 %tmp2 = and i32 %tmp1, 16711680 @@ -90,7 +90,7 @@ define i32 @test9(i32 %x) { ; CHECK-LABEL: test9: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtb16 r0, r0, ror #24 +; CHECK-NEXT: lsr r0, r0, #24 ; CHECK-NEXT: bx lr %tmp1 = lshr i32 %x, 24 %tmp4 = shl i32 %x, 8 diff --git a/llvm/test/CodeGen/PowerPC/rlwinm2.ll b/llvm/test/CodeGen/PowerPC/rlwinm2.ll --- a/llvm/test/CodeGen/PowerPC/rlwinm2.ll +++ b/llvm/test/CodeGen/PowerPC/rlwinm2.ll @@ -20,7 +20,7 @@ define i32 @test2(i32 %X) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: rlwinm 3, 3, 5, 25, 31 +; CHECK-NEXT: srwi 3, 3, 27 ; CHECK-NEXT: blr entry: %tmp1 = lshr i32 %X, 27 ; [#uses=1] diff --git a/llvm/test/CodeGen/SystemZ/risbg-01.ll b/llvm/test/CodeGen/SystemZ/risbg-01.ll --- a/llvm/test/CodeGen/SystemZ/risbg-01.ll +++ b/llvm/test/CodeGen/SystemZ/risbg-01.ll @@ -216,12 +216,12 @@ ; CHECK-LABEL: f17: ; CHECK: # %bb.0: ; CHECK-NEXT: rll %r2, %r2, 4 -; CHECK-NEXT: nilf %r2, 126 +; CHECK-NEXT: nill %r2, 65410 ; CHECK-NEXT: br %r14 %parta = shl i32 %foo, 4 %partb = lshr i32 %foo, 28 %rotl = or i32 %parta, %partb - %and = and i32 %rotl, 126 + %and = and i32 %rotl, -126 ret i32 %and } @@ -229,7 +229,7 @@ define i64 @f18(i64 %foo) { ; CHECK-LABEL: f18: ; CHECK: # %bb.0: -; CHECK-NEXT: risbg %r2, %r2, 57, 190, 4 +; CHECK-NEXT: risbg %r2, %r2, 60, 190, 4 ; CHECK-NEXT: br %r14 %parta = shl i64 %foo, 4 %partb = lshr i64 %foo, 60 diff --git a/llvm/test/CodeGen/SystemZ/risbg-04.ll b/llvm/test/CodeGen/SystemZ/risbg-04.ll --- a/llvm/test/CodeGen/SystemZ/risbg-04.ll +++ b/llvm/test/CodeGen/SystemZ/risbg-04.ll @@ -204,12 +204,12 @@ ; CHECK-LABEL: f17: ; CHECK: # %bb.0: ; CHECK-NEXT: rll %r2, %r2, 4 -; CHECK-NEXT: nilf %r2, 126 +; CHECK-NEXT: nill %r2, 65410 ; CHECK-NEXT: br %r14 %parta = shl i32 %foo, 4 %partb = lshr i32 %foo, 28 %rotl = or i32 %parta, %partb - %and = and i32 %rotl, 126 + %and = and i32 %rotl, -126 ret i32 %and } @@ -217,7 +217,7 @@ define i64 @f18(i64 %foo) { ; CHECK-LABEL: f18: ; CHECK: # %bb.0: -; CHECK-NEXT: risbg %r2, %r2, 57, 190, 4 +; CHECK-NEXT: risbg %r2, %r2, 60, 190, 4 ; CHECK-NEXT: br %r14 %parta = shl i64 %foo, 4 %partb = lshr i64 %foo, 60 diff --git a/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll --- a/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll @@ -85,16 +85,10 @@ } define signext i32 @test5(i32 %A, i32 %X) { -; CHECK-DSP-LABEL: test5: -; CHECK-DSP: @ %bb.0: -; CHECK-DSP-NEXT: sxtah r0, r1, r0, ror #24 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-NO-DSP-LABEL: test5: -; CHECK-NO-DSP: @ %bb.0: -; CHECK-NO-DSP-NEXT: sxth.w r0, r0, ror #24 -; CHECK-NO-DSP-NEXT: add r0, r1 -; CHECK-NO-DSP-NEXT: bx lr +; CHECK-LABEL: test5: +; CHECK: @ %bb.0: +; CHECK-NEXT: add.w r0, r1, r0, lsr #24 +; CHECK-NEXT: bx lr %B = lshr i32 %A, 24 %C = shl i32 %A, 8 %D = or i32 %B, %C diff --git a/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll --- a/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll @@ -100,17 +100,10 @@ } define i32 @test7(i32 %A, i32 %X) { -; CHECK-DSP-LABEL: test7: -; CHECK-DSP: @ %bb.0: -; CHECK-DSP-NEXT: uxtah r0, r0, r1, ror #24 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-NO-DSP-LABEL: test7: -; CHECK-NO-DSP: @ %bb.0: -; CHECK-NO-DSP-NEXT: ror.w r1, r1, #24 -; CHECK-NO-DSP-NEXT: uxth r1, r1 -; CHECK-NO-DSP-NEXT: add r0, r1 -; CHECK-NO-DSP-NEXT: bx lr +; CHECK-LABEL: test7: +; CHECK: @ %bb.0: +; CHECK-NEXT: add.w r0, r0, r1, lsr #24 +; CHECK-NEXT: bx lr %lshr = lshr i32 %X, 24 %shl = shl i32 %X, 8 %or = or i32 %lshr, %shl @@ -121,17 +114,10 @@ } define i32 @test8(i32 %A, i32 %X) { -; CHECK-DSP-LABEL: test8: -; CHECK-DSP: @ %bb.0: -; CHECK-DSP-NEXT: uxtah r0, r0, r1, ror #24 -; CHECK-DSP-NEXT: bx lr -; -; CHECK-NO-DSP-LABEL: test8: -; CHECK-NO-DSP: @ %bb.0: -; CHECK-NO-DSP-NEXT: ror.w r1, r1, #24 -; CHECK-NO-DSP-NEXT: uxth r1, r1 -; CHECK-NO-DSP-NEXT: add r0, r1 -; CHECK-NO-DSP-NEXT: bx lr +; CHECK-LABEL: test8: +; CHECK: @ %bb.0: +; CHECK-NEXT: add.w r0, r0, r1, lsr #24 +; CHECK-NEXT: bx lr %lshr = lshr i32 %X, 24 %shl = shl i32 %X, 8 %or = or i32 %lshr, %shl diff --git a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll --- a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -125,13 +125,12 @@ define i32 @test8(i32 %x) { ; CHECK-DSP-LABEL: test8: ; CHECK-DSP: @ %bb.0: -; CHECK-DSP-NEXT: uxtb16 r0, r0, ror #24 +; CHECK-DSP-NEXT: lsrs r0, r0, #24 ; CHECK-DSP-NEXT: bx lr ; ; CHECK-NO-DSP-LABEL: test8: ; CHECK-NO-DSP: @ %bb.0: -; CHECK-NO-DSP-NEXT: mov.w r1, #16711935 -; CHECK-NO-DSP-NEXT: and.w r0, r1, r0, ror #24 +; CHECK-NO-DSP-NEXT: lsrs r0, r0, #24 ; CHECK-NO-DSP-NEXT: bx lr %tmp1 = shl i32 %x, 8 ; [#uses=1] %tmp2 = and i32 %tmp1, 16711680 ; [#uses=1] @@ -143,13 +142,12 @@ define i32 @test9(i32 %x) { ; CHECK-DSP-LABEL: test9: ; CHECK-DSP: @ %bb.0: -; CHECK-DSP-NEXT: uxtb16 r0, r0, ror #24 +; CHECK-DSP-NEXT: lsrs r0, r0, #24 ; CHECK-DSP-NEXT: bx lr ; ; CHECK-NO-DSP-LABEL: test9: ; CHECK-NO-DSP: @ %bb.0: -; CHECK-NO-DSP-NEXT: mov.w r1, #16711935 -; CHECK-NO-DSP-NEXT: and.w r0, r1, r0, ror #24 +; CHECK-NO-DSP-NEXT: lsrs r0, r0, #24 ; CHECK-NO-DSP-NEXT: bx lr %tmp1 = lshr i32 %x, 24 ; [#uses=1] %tmp4 = shl i32 %x, 8 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll --- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll +++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll @@ -104,23 +104,12 @@ } define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind { -; X86-LABEL: vrolw_extract_mul_with_mask: -; X86: # %bb.0: -; X86-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] -; X86-NEXT: vpmulld %xmm1, %xmm0, %xmm0 -; X86-NEXT: vprold $7, %zmm0, %zmm0 -; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 -; X86-NEXT: vzeroupper -; X86-NEXT: retl -; -; X64-LABEL: vrolw_extract_mul_with_mask: -; X64: # %bb.0: -; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] -; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0 -; X64-NEXT: vprold $7, %zmm0, %zmm0 -; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vzeroupper -; X64-NEXT: retq +; CHECK-LABEL: vrolw_extract_mul_with_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpsrld $25, %xmm0, %xmm0 +; CHECK-NEXT: ret{{[l|q]}} %lhs_mul = mul <4 x i32> %i, %rhs_mul = mul <4 x i32> %i, %lhs_and = and <4 x i32> %lhs_mul, diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll --- a/llvm/test/CodeGen/X86/rotate-extract.ll +++ b/llvm/test/CodeGen/X86/rotate-extract.ll @@ -109,17 +109,16 @@ ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: movl $9, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: shrdl $25, %eax, %edx -; X86-NEXT: movzbl %dl, %eax +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: shrl $25, %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: rolq_extract_mul_with_mask: ; X64: # %bb.0: ; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: rolq $7, %rax -; X64-NEXT: movzbl %al, %eax +; X64-NEXT: shrq $57, %rax ; X64-NEXT: retq %lhs_mul = mul i64 %i, 1152 %rhs_mul = mul i64 %i, 9 diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll --- a/llvm/test/CodeGen/X86/rotate_vec.ll +++ b/llvm/test/CodeGen/X86/rotate_vec.ll @@ -36,17 +36,11 @@ } define <4 x i32> @rot_v4i32_splat_2masks(<4 x i32> %x) { -; XOP-LABEL: rot_v4i32_splat_2masks: -; XOP: # %bb.0: -; XOP-NEXT: vprotd $31, %xmm0, %xmm0 -; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: retq -; -; AVX512-LABEL: rot_v4i32_splat_2masks: -; AVX512: # %bb.0: -; AVX512-NEXT: vprold $31, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: retq +; CHECK-LABEL: rot_v4i32_splat_2masks: +; CHECK: # %bb.0: +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 +; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-NEXT: retq %1 = lshr <4 x i32> %x, %2 = and <4 x i32> %1, diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -2075,51 +2075,15 @@ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: splatconstant_rotate_mask_v2i64: -; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vprolq $15, %zmm0, %zmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatconstant_rotate_mask_v2i64: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vprolq $15, %xmm0, %xmm0 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatconstant_rotate_mask_v2i64: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vprolq $15, %zmm0, %zmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatconstant_rotate_mask_v2i64: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vprolq $15, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VLBW-NEXT: retq -; -; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v2i64: -; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vprolq $15, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vzeroupper -; AVX512VBMI2-NEXT: retq -; -; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v2i64: -; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vprolq $15, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: retq +; AVX512-LABEL: splatconstant_rotate_mask_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlq $49, %xmm0, %xmm0 +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_mask_v2i64: ; XOP: # %bb.0: -; XOP-NEXT: vprotq $15, %xmm0, %xmm0 +; XOP-NEXT: vpsrlq $49, %xmm0, %xmm0 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: retq ; @@ -2139,76 +2103,27 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind { ; SSE-LABEL: splatconstant_rotate_mask_v4i32: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: psrld $28, %xmm1 -; SSE-NEXT: pslld $4, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: psrld $28, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: splatconstant_rotate_mask_v4i32: ; AVX: # %bb.0: -; AVX-NEXT: vpsrld $28, %xmm0, %xmm1 -; AVX-NEXT: vpslld $4, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsrld $28, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: splatconstant_rotate_mask_v4i32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vprold $4, %zmm0, %zmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatconstant_rotate_mask_v4i32: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vprold $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatconstant_rotate_mask_v4i32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatconstant_rotate_mask_v4i32: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vprold $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VLBW-NEXT: retq -; -; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v4i32: -; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vprold $4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vzeroupper -; AVX512VBMI2-NEXT: retq -; -; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v4i32: -; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vprold $4, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: retq +; AVX512-LABEL: splatconstant_rotate_mask_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrld $28, %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_mask_v4i32: ; XOP: # %bb.0: -; XOP-NEXT: vprotd $4, %xmm0, %xmm0 -; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: vpsrld $28, %xmm0, %xmm0 ; XOP-NEXT: retq ; ; X86-SSE2-LABEL: splatconstant_rotate_mask_v4i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psrld $28, %xmm1 -; X86-SSE2-NEXT: pslld $4, %xmm0 -; X86-SSE2-NEXT: por %xmm1, %xmm0 -; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: psrld $28, %xmm0 ; X86-SSE2-NEXT: retl %shl = shl <4 x i32> %a, %lshr = lshr <4 x i32> %a, @@ -2221,77 +2136,31 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind { ; SSE-LABEL: splatconstant_rotate_mask_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: psrlw $11, %xmm1 -; SSE-NEXT: psllw $5, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: psrlw $11, %xmm0 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: splatconstant_rotate_mask_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpsrlw $11, %xmm0, %xmm1 -; AVX-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $11, %xmm0, %xmm0 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $11, %xmm0, %xmm1 -; AVX512F-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $5, %xmm0, %xmm1 -; AVX512VL-NEXT: vpsrlw $11, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $11, %xmm0, %xmm1 -; AVX512BW-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $5, %xmm0, %xmm1 -; AVX512VLBW-NEXT: vpsrlw $11, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 -; AVX512VLBW-NEXT: retq -; -; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vzeroupper -; AVX512VBMI2-NEXT: retq -; -; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpshldw $5, %xmm0, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: retq +; AVX512-LABEL: splatconstant_rotate_mask_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlw $11, %xmm0, %xmm0 +; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_mask_v8i16: ; XOP: # %bb.0: -; XOP-NEXT: vprotw $5, %xmm0, %xmm0 +; XOP-NEXT: vpsrlw $11, %xmm0, %xmm0 ; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: retq ; ; X86-SSE2-LABEL: splatconstant_rotate_mask_v8i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psrlw $11, %xmm1 -; X86-SSE2-NEXT: psllw $5, %xmm0 -; X86-SSE2-NEXT: por %xmm1, %xmm0 +; X86-SSE2-NEXT: psrlw $11, %xmm0 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: retl %shl = shl <8 x i16> %a, diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -1801,58 +1801,52 @@ ; ; AVX512F-LABEL: splatconstant_rotate_mask_v4i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vprolq $15, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrlq $49, %ymm0, %ymm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_mask_v4i64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vprolq $15, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlq $49, %ymm0, %ymm0 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v4i64: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vprolq $15, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlq $49, %ymm0, %ymm0 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v4i64: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vprolq $15, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsrlq $49, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v4i64: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vprolq $15, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpsrlq $49, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v4i64: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vprolq $15, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpsrlq $49, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm1 +; XOPAVX1-NEXT: vpsrlq $49, %xmm0, %xmm1 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrlq $49, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; XOPAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i64: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vprotq $15, %xmm0, %xmm1 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; XOPAVX2-NEXT: vprotq $15, %xmm0, %xmm0 -; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpsrlq $49, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shl = shl <4 x i64> %a, @@ -1866,79 +1860,67 @@ define <8 x i32> @splatconstant_rotate_mask_v8i32(<8 x i32> %a) nounwind { ; AVX1-LABEL: splatconstant_rotate_mask_v8i32: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2 -; AVX1-NEXT: vpslld $4, %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrld $28, %xmm0, %xmm2 -; AVX1-NEXT: vpslld $4, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrld $28, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrld $28, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatconstant_rotate_mask_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsrld $28, %ymm0, %ymm1 -; AVX2-NEXT: vpslld $4, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrld $28, %ymm0, %ymm0 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatconstant_rotate_mask_v8i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vprold $4, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrld $28, %ymm0, %ymm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_mask_v8i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vprold $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrld $28, %ymm0, %ymm0 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v8i32: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vprold $4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrld $28, %ymm0, %ymm0 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v8i32: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vprold $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsrld $28, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v8i32: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vprold $4, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpsrld $28, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v8i32: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vprold $4, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpsrld $28, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_mask_v8i32: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm1 +; XOPAVX1-NEXT: vpsrld $28, %xmm0, %xmm1 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrld $28, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; XOPAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatconstant_rotate_mask_v8i32: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm1 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0 -; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpsrld $28, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shl = shl <8 x i32> %a, @@ -1952,83 +1934,67 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind { ; AVX1-LABEL: splatconstant_rotate_mask_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpsrlw $11, %xmm1, %xmm2 -; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $11, %xmm0, %xmm2 -; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vpsrlw $11, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrlw $11, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatconstant_rotate_mask_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpsrlw $11, %ymm0, %ymm1 -; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $11, %ymm0, %ymm0 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm1 -; AVX512F-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $11, %ymm0, %ymm1 -; AVX512BW-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsrlw $11, %ymm0, %ymm0 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $5, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsrlw $11, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpsrlw $11, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v16i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpshldw $5, %ymm0, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpsrlw $11, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_mask_v16i16: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: vprotw $5, %xmm0, %xmm1 +; XOPAVX1-NEXT: vpsrlw $11, %xmm0, %xmm1 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; XOPAVX1-NEXT: vprotw $5, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrlw $11, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; XOPAVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: splatconstant_rotate_mask_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vprotw $5, %xmm0, %xmm1 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; XOPAVX2-NEXT: vprotw $5, %xmm0, %xmm0 -; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX2-NEXT: vpsrlw $11, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %shl = shl <16 x i16> %a, diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -1043,7 +1043,7 @@ define <8 x i64> @splatconstant_rotate_mask_v8i64(<8 x i64> %a) nounwind { ; AVX512-LABEL: splatconstant_rotate_mask_v8i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vprolq $15, %zmm0, %zmm0 +; AVX512-NEXT: vpsrlq $49, %zmm0, %zmm0 ; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: retq %shl = shl <8 x i64> %a, @@ -1057,7 +1057,7 @@ define <16 x i32> @splatconstant_rotate_mask_v16i32(<16 x i32> %a) nounwind { ; AVX512-LABEL: splatconstant_rotate_mask_v16i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vprold $4, %zmm0, %zmm0 +; AVX512-NEXT: vpsrld $28, %zmm0, %zmm0 ; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: retq %shl = shl <16 x i32> %a, @@ -1071,51 +1071,43 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind { ; AVX512F-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsllw $5, %ymm0, %ymm1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpsllw $5, %ymm2, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $11, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm1 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpsllw $5, %ymm2, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $11, %ymm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $5, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $5, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512VBMI2: # %bb.0: -; AVX512VBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpsrlw $11, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VBMI2-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpshldw $5, %zmm0, %zmm0, %zmm0 +; AVX512VLVBMI2-NEXT: vpsrlw $11, %zmm0, %zmm0 ; AVX512VLVBMI2-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLVBMI2-NEXT: retq %shl = shl <32 x i16> %a,