diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29735,8 +29735,22 @@ uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); - if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) + if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) { + // Hardware support for vector shifts is sparse which makes us scalarize the + // vector operations in many cases. Also, on sandybridge ADD is faster than + // shl: (shl V, 1) -> (add (freeze V), (freeze V)) + if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) { + // R may be undef at run-time, but (shl R, 1) must be an even number (LSB + // must be 0). (add undef, undef) however can be any value. To make this + // safe, we must freeze R to ensure that register allocation uses the same + // register for an undefined value. This ensures that the result will + // still be even and preserves the original semantics. + R = DAG.getFreeze(R); + return DAG.getNode(ISD::ADD, dl, VT, R, R); + } + return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + } // i64 SRA needs to be performed as partial shifts. if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || @@ -46674,20 +46688,6 @@ } } - // Hardware support for vector shifts is sparse which makes us scalarize the - // vector operations in many cases. Also, on sandybridge ADD is faster than - // shl. - // (shl V, 1) -> add V,V - if (auto *N1BV = dyn_cast(N1)) - if (auto *N1SplatC = N1BV->getConstantSplatNode()) { - assert(N0.getValueType().isVector() && "Invalid vector shift type"); - // We shift all of the values by one. In many cases we do not have - // hardware support for this operation. This is better expressed as an ADD - // of two values. - if (N1SplatC->isOne()) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); - } - return SDValue(); } diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -80,13 +80,13 @@ define <4 x i64> @combine_vec_mul_pow2c(<4 x i64> %x) { ; SSE-LABEL: combine_vec_mul_pow2c: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: psllq $1, %xmm2 -; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: movdqa %xmm1, %xmm2 ; SSE-NEXT: psllq $4, %xmm2 ; SSE-NEXT: psllq $2, %xmm1 ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] +; SSE-NEXT: movdqa %xmm0, %xmm2 +; SSE-NEXT: paddq %xmm0, %xmm2 +; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_mul_pow2c: diff --git a/llvm/test/CodeGen/X86/freeze-binary.ll b/llvm/test/CodeGen/X86/freeze-binary.ll --- a/llvm/test/CodeGen/X86/freeze-binary.ll +++ b/llvm/test/CodeGen/X86/freeze-binary.ll @@ -398,7 +398,7 @@ define <2 x i64> @freeze_shl_vec_outofrange(<2 x i64> %a0) nounwind { ; X86-LABEL: freeze_shl_vec_outofrange: ; X86: # %bb.0: -; X86-NEXT: psllq $1, %xmm0 +; X86-NEXT: paddq %xmm0, %xmm0 ; X86-NEXT: psllq $2, %xmm0 ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -157,71 +157,71 @@ ; SSE2-LABEL: PR42833: ; SSE2: # %bb.0: ; SSE2-NEXT: movl b(%rip), %eax -; SSE2-NEXT: movdqa c+144(%rip), %xmm0 -; SSE2-NEXT: movdqa c+128(%rip), %xmm1 +; SSE2-NEXT: movdqa c+128(%rip), %xmm0 +; SSE2-NEXT: movdqa c+144(%rip), %xmm1 ; SSE2-NEXT: addl c+128(%rip), %eax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: paddd %xmm1, %xmm3 +; SSE2-NEXT: paddd %xmm0, %xmm3 ; SSE2-NEXT: movdqa d+144(%rip), %xmm4 -; SSE2-NEXT: psubd %xmm0, %xmm4 -; SSE2-NEXT: paddd %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: paddd %xmm1, %xmm5 +; SSE2-NEXT: psubd %xmm1, %xmm4 +; SSE2-NEXT: paddd %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: paddd %xmm0, %xmm5 ; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3] -; SSE2-NEXT: movdqa %xmm0, c+144(%rip) +; SSE2-NEXT: movdqa %xmm1, c+144(%rip) ; SSE2-NEXT: movaps %xmm5, c+128(%rip) -; SSE2-NEXT: movdqa c+160(%rip), %xmm0 +; SSE2-NEXT: movdqa c+160(%rip), %xmm1 ; SSE2-NEXT: movdqa c+176(%rip), %xmm3 ; SSE2-NEXT: movdqa d+160(%rip), %xmm5 ; SSE2-NEXT: movdqa d+176(%rip), %xmm6 ; SSE2-NEXT: movdqa d+128(%rip), %xmm7 -; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] -; SSE2-NEXT: psubd %xmm1, %xmm7 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: psubd %xmm0, %xmm7 ; SSE2-NEXT: psubd %xmm3, %xmm6 -; SSE2-NEXT: psubd %xmm0, %xmm5 +; SSE2-NEXT: psubd %xmm1, %xmm5 ; SSE2-NEXT: movdqa %xmm5, d+160(%rip) ; SSE2-NEXT: movdqa %xmm6, d+176(%rip) ; SSE2-NEXT: movdqa %xmm4, d+144(%rip) ; SSE2-NEXT: movdqa %xmm7, d+128(%rip) ; SSE2-NEXT: paddd %xmm3, %xmm3 -; SSE2-NEXT: paddd %xmm0, %xmm0 -; SSE2-NEXT: movdqa %xmm0, c+160(%rip) +; SSE2-NEXT: paddd %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, c+160(%rip) ; SSE2-NEXT: movdqa %xmm3, c+176(%rip) ; SSE2-NEXT: retq ; ; SSE42-LABEL: PR42833: ; SSE42: # %bb.0: ; SSE42-NEXT: movl b(%rip), %eax -; SSE42-NEXT: movdqa c+144(%rip), %xmm0 -; SSE42-NEXT: movdqa c+128(%rip), %xmm1 +; SSE42-NEXT: movdqa c+128(%rip), %xmm0 +; SSE42-NEXT: movdqa c+144(%rip), %xmm1 ; SSE42-NEXT: addl c+128(%rip), %eax ; SSE42-NEXT: movd %eax, %xmm2 -; SSE42-NEXT: paddd %xmm1, %xmm2 +; SSE42-NEXT: paddd %xmm0, %xmm2 ; SSE42-NEXT: movdqa d+144(%rip), %xmm3 -; SSE42-NEXT: psubd %xmm0, %xmm3 -; SSE42-NEXT: paddd %xmm0, %xmm0 -; SSE42-NEXT: movdqa %xmm1, %xmm4 -; SSE42-NEXT: paddd %xmm1, %xmm4 +; SSE42-NEXT: psubd %xmm1, %xmm3 +; SSE42-NEXT: paddd %xmm1, %xmm1 +; SSE42-NEXT: movdqa %xmm0, %xmm4 +; SSE42-NEXT: paddd %xmm0, %xmm4 ; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7] -; SSE42-NEXT: movdqa %xmm0, c+144(%rip) +; SSE42-NEXT: movdqa %xmm1, c+144(%rip) ; SSE42-NEXT: movdqa %xmm4, c+128(%rip) -; SSE42-NEXT: movdqa c+160(%rip), %xmm0 +; SSE42-NEXT: movdqa c+160(%rip), %xmm1 ; SSE42-NEXT: movdqa c+176(%rip), %xmm2 ; SSE42-NEXT: movdqa d+160(%rip), %xmm4 ; SSE42-NEXT: movdqa d+176(%rip), %xmm5 ; SSE42-NEXT: movdqa d+128(%rip), %xmm6 -; SSE42-NEXT: pinsrd $0, %eax, %xmm1 -; SSE42-NEXT: psubd %xmm1, %xmm6 +; SSE42-NEXT: pinsrd $0, %eax, %xmm0 +; SSE42-NEXT: psubd %xmm0, %xmm6 ; SSE42-NEXT: psubd %xmm2, %xmm5 -; SSE42-NEXT: psubd %xmm0, %xmm4 +; SSE42-NEXT: psubd %xmm1, %xmm4 ; SSE42-NEXT: movdqa %xmm4, d+160(%rip) ; SSE42-NEXT: movdqa %xmm5, d+176(%rip) ; SSE42-NEXT: movdqa %xmm3, d+144(%rip) ; SSE42-NEXT: movdqa %xmm6, d+128(%rip) ; SSE42-NEXT: paddd %xmm2, %xmm2 -; SSE42-NEXT: paddd %xmm0, %xmm0 -; SSE42-NEXT: movdqa %xmm0, c+160(%rip) +; SSE42-NEXT: paddd %xmm1, %xmm1 +; SSE42-NEXT: movdqa %xmm1, c+160(%rip) ; SSE42-NEXT: movdqa %xmm2, c+176(%rip) ; SSE42-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll --- a/llvm/test/CodeGen/X86/rotate_vec.ll +++ b/llvm/test/CodeGen/X86/rotate_vec.ll @@ -111,21 +111,18 @@ ; XOPAVX1-LABEL: rot_v4i32_mask_ashr0: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: rot_v4i32_mask_ashr0: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: rot_v4i32_mask_ashr0: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = ashr <4 x i32> %a0, @@ -139,7 +136,6 @@ ; XOPAVX1-LABEL: rot_v4i32_mask_ashr1: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX1-NEXT: retq @@ -147,7 +143,6 @@ ; XOPAVX2-LABEL: rot_v4i32_mask_ashr1: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpsrad $25, %xmm0, %xmm0 -; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0 ; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX2-NEXT: retq @@ -155,7 +150,6 @@ ; AVX512-LABEL: rot_v4i32_mask_ashr1: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrad $25, %xmm0, %xmm0 -; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0 ; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -581,28 +581,33 @@ ; X64-NEXT: subq $104, %rsp ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pcmpgtd %xmm0, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] +; X64-NEXT: psllq $32, %xmm3 +; X64-NEXT: movdqa %xmm3, %xmm2 +; X64-NEXT: psrad $31, %xmm2 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; X64-NEXT: psrlq $31, %xmm3 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-NEXT: paddq %xmm0, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm0, %rbx -; X64-NEXT: movq %rbx, %rbp +; X64-NEXT: movq %xmm0, %r15 +; X64-NEXT: movq %r15, %rbp ; X64-NEXT: sarq $63, %rbp -; X64-NEXT: shldq $31, %rbx, %rbp +; X64-NEXT: shldq $31, %r15, %rbp +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] ; X64-NEXT: pxor %xmm0, %xmm0 ; X64-NEXT: pcmpgtd %xmm1, %xmm0 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movq %xmm1, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rdx, %r15 -; X64-NEXT: sarq $63, %r15 -; X64-NEXT: movq %rbx, %r12 +; X64-NEXT: movq %rdx, %rbx +; X64-NEXT: sarq $63, %rbx +; X64-NEXT: movq %r15, %r12 ; X64-NEXT: shlq $31, %r12 ; X64-NEXT: movq %r12, %rdi ; X64-NEXT: movq %rbp, %rsi -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __divti3@PLT ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -610,16 +615,16 @@ ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: subq $1, %r13 ; X64-NEXT: sbbq $0, %r14 -; X64-NEXT: shrq $63, %rbx -; X64-NEXT: xorl %r15d, %ebx +; X64-NEXT: shrq $63, %r15 +; X64-NEXT: xorl %ebx, %r15d ; X64-NEXT: movq %r12, %rdi ; X64-NEXT: movq %rbp, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: movq %r15, %rcx +; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al -; X64-NEXT: testb %bl, %al +; X64-NEXT: testb %r15b, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF @@ -699,43 +704,45 @@ ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: psrlq $1, %xmm1 ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; X64-NEXT: # xmm1 = mem[2,3,2,3] -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: pcmpgtd %xmm1, %xmm0 -; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: paddq %xmm1, %xmm1 -; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm1, %rbx -; X64-NEXT: movq %rbx, %r12 -; X64-NEXT: sarq $63, %r12 -; X64-NEXT: shldq $31, %rbx, %r12 -; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; X64-NEXT: # xmm1 = mem[2,3,2,3] -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: pcmpgtd %xmm1, %xmm0 -; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movq %xmm1, %rdx +; X64-NEXT: pshufd $212, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = mem[0,1,1,3] +; X64-NEXT: psllq $32, %xmm0 +; X64-NEXT: movdqa %xmm0, %xmm1 +; X64-NEXT: psrad $31, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X64-NEXT: psrlq $31, %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movq %xmm0, %rbx +; X64-NEXT: movq %rbx, %r13 +; X64-NEXT: sarq $63, %r13 +; X64-NEXT: shldq $31, %rbx, %r13 +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: pxor %xmm1, %xmm1 +; X64-NEXT: pcmpgtd %xmm0, %xmm1 +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movq %xmm0, %rdx ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: sarq $63, %rbp ; X64-NEXT: movq %rbx, %r15 ; X64-NEXT: shlq $31, %r15 ; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq %rbp, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %r13 +; X64-NEXT: movq %rax, %r12 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %r13 +; X64-NEXT: subq $1, %r12 ; X64-NEXT: sbbq $0, %r14 ; X64-NEXT: shrq $63, %rbx ; X64-NEXT: xorl %ebp, %ebx ; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi +; X64-NEXT: movq %r13, %rsi ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; X64-NEXT: movq %rbp, %rcx ; X64-NEXT: callq __modti3@PLT @@ -743,25 +750,25 @@ ; X64-NEXT: setne %al ; X64-NEXT: testb %bl, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload ; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: cmpq %rcx, %r13 +; X64-NEXT: cmpq %rcx, %r12 ; X64-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF -; X64-NEXT: cmovbq %r13, %rax +; X64-NEXT: cmovbq %r12, %rax ; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovnsq %rcx, %r13 -; X64-NEXT: cmoveq %rax, %r13 +; X64-NEXT: cmovnsq %rcx, %r12 +; X64-NEXT: cmoveq %rax, %r12 ; X64-NEXT: movl $0, %eax ; X64-NEXT: cmovnsq %rax, %r14 ; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-NEXT: cmpq %rcx, %r13 +; X64-NEXT: cmpq %rcx, %r12 ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: cmovaq %r13, %rax +; X64-NEXT: cmovaq %r12, %rax ; X64-NEXT: testq %r14, %r14 -; X64-NEXT: cmovsq %rcx, %r13 +; X64-NEXT: cmovsq %rcx, %r12 ; X64-NEXT: cmpq $-1, %r14 -; X64-NEXT: cmoveq %rax, %r13 -; X64-NEXT: movq %r13, %xmm0 +; X64-NEXT: cmoveq %rax, %r12 +; X64-NEXT: movq %r12, %xmm0 ; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; X64-NEXT: # xmm0 = mem[2,3,2,3] @@ -816,12 +823,12 @@ ; X64-NEXT: cmovsq %rcx, %r12 ; X64-NEXT: cmpq $-1, %r14 ; X64-NEXT: cmoveq %rax, %r12 -; X64-NEXT: movq %r12, %xmm0 -; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-NEXT: psrlq $1, %xmm1 -; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; X64-NEXT: movq %r12, %xmm1 +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: psrlq $1, %xmm0 +; X64-NEXT: shufps $136, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = xmm0[0,2],mem[0,2] ; X64-NEXT: addq $104, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 @@ -840,116 +847,108 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $256, %esp # imm = 0x100 -; X86-NEXT: movl 24(%ebp), %edx -; X86-NEXT: movl 40(%ebp), %edi -; X86-NEXT: leal {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %edi, %esi -; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl %edx, %eax -; X86-NEXT: sarl $31, %eax -; X86-NEXT: addl %edx, %edx -; X86-NEXT: adcl %eax, %eax +; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shldl $31, %edx, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $31, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %eax -; X86-NEXT: negl %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sarl $31, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: leal (%edi,%edi), %eax +; X86-NEXT: shrl $31, %edi +; X86-NEXT: shldl $31, %eax, %edi +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %edx +; X86-NEXT: calll __divti3 +; X86-NEXT: addl $32, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl 32(%ebp) +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl $0 +; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl 36(%ebp), %esi -; X86-NEXT: movl %esi, %edi -; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl 20(%ebp), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: adcl %edx, %edx +; X86-NEXT: movl 36(%ebp), %edx ; X86-NEXT: movl %edx, %ebx -; X86-NEXT: shldl $31, %ecx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $31, %ecx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: sarl $31, %esi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %edx -; X86-NEXT: negl %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %esi +; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl 28(%ebp), %ebx -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: addl %eax, %eax -; X86-NEXT: adcl %esi, %esi -; X86-NEXT: movl %esi, %ecx +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx ; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll $31, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %esi -; X86-NEXT: negl %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ebx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edi ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl 32(%ebp), %edx +; X86-NEXT: movl 40(%ebp), %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: sarl $31, %esi -; X86-NEXT: movl 16(%ebp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: adcl %ebx, %ebx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: shldl $31, %ecx, %edi -; X86-NEXT: shll $31, %ecx +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: leal (%ecx,%ecx), %eax +; X86-NEXT: shrl $31, %ecx +; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl $1, %ebx -; X86-NEXT: negl %ebx +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp @@ -958,39 +957,25 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl 32(%ebp) -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl %eax -; X86-NEXT: calll __divti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx ; X86-NEXT: pushl 40(%ebp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl 36(%ebp) ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %edi ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp @@ -1005,22 +990,22 @@ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: sets %bl ; X86-NEXT: testl %edi, %edi -; X86-NEXT: sets %al -; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: sets %ah -; X86-NEXT: xorb %al, %ah +; X86-NEXT: sets %bh +; X86-NEXT: xorb %bl, %bh ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: orl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl %edi, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: setne %al -; X86-NEXT: testb %ah, %al +; X86-NEXT: testb %bh, %al ; X86-NEXT: cmovel %esi, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -1030,7 +1015,7 @@ ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1047,7 +1032,7 @@ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bl ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bh @@ -1085,11 +1070,11 @@ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sets %al ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %al +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: sets %bl ; X86-NEXT: xorb %al, %bl ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax @@ -1100,7 +1085,7 @@ ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax ; X86-NEXT: calll __modti3 ; X86-NEXT: addl $32, %esp diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -382,93 +382,85 @@ ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: addl %eax, %eax -; X86-NEXT: setb %cl -; X86-NEXT: shldl $31, %eax, %ecx -; X86-NEXT: shll $31, %eax +; X86-NEXT: leal (%eax,%eax), %ecx +; X86-NEXT: shrl $31, %eax +; X86-NEXT: shldl $31, %ecx, %eax ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %eax +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %ebp, %ebp -; X86-NEXT: setb %al -; X86-NEXT: shldl $31, %ebp, %eax -; X86-NEXT: shll $31, %ebp +; X86-NEXT: movl %edx, %edi +; X86-NEXT: leal (%ebx,%ebx), %eax +; X86-NEXT: shrl $31, %ebx +; X86-NEXT: shldl $31, %eax, %ebx ; X86-NEXT: pushl $0 -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %edx, %ebp -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %edi, %edi -; X86-NEXT: setb %al -; X86-NEXT: shldl $31, %edi, %eax -; X86-NEXT: shll $31, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: leal (%esi,%esi), %eax +; X86-NEXT: shrl $31, %esi +; X86-NEXT: shldl $31, %eax, %esi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %esi, %esi -; X86-NEXT: setb %al -; X86-NEXT: shldl $31, %esi, %eax -; X86-NEXT: shll $31, %esi +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: leal (%edx,%edx), %ecx +; X86-NEXT: shrl $31, %edx +; X86-NEXT: shldl $31, %ecx, %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: cmpl $2, %esi +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovael %edx, %eax +; X86-NEXT: cmpl $1, %esi +; X86-NEXT: movl $1, %ebp +; X86-NEXT: cmovael %ebp, %esi +; X86-NEXT: shldl $31, %eax, %esi +; X86-NEXT: cmpl $2, %ebx +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: cmovael %edx, %eax +; X86-NEXT: cmpl $1, %ebx +; X86-NEXT: cmovael %ebp, %ebx +; X86-NEXT: shldl $31, %eax, %ebx +; X86-NEXT: cmpl $2, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovael %edx, %eax +; X86-NEXT: cmpl $1, %edi +; X86-NEXT: cmovael %ebp, %edi +; X86-NEXT: shldl $31, %eax, %edi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl $0 ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: cmpl $2, %edx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovael %esi, %eax +; X86-NEXT: movl $-1, %ecx +; X86-NEXT: cmovael %ecx, %eax ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $1, %ecx -; X86-NEXT: cmovael %ecx, %edx -; X86-NEXT: shldl $31, %eax, %edx -; X86-NEXT: cmpl $2, %edi -; X86-NEXT: cmovael %esi, %ebx -; X86-NEXT: cmpl $1, %edi -; X86-NEXT: cmovael %ecx, %edi -; X86-NEXT: shldl $31, %ebx, %edi -; X86-NEXT: cmpl $2, %ebp -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: cmovael %esi, %eax -; X86-NEXT: cmpl $1, %ebp -; X86-NEXT: cmovael %ecx, %ebp +; X86-NEXT: cmovbl %edx, %ebp ; X86-NEXT: shldl $31, %eax, %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: cmpl $2, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovael %esi, %eax -; X86-NEXT: cmpl $1, %ebx -; X86-NEXT: cmovbl %ebx, %ecx -; X86-NEXT: shldl $31, %eax, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, 12(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $12, %esp +; X86-NEXT: movl %ebp, 12(%eax) +; X86-NEXT: movl %edi, 8(%eax) +; X86-NEXT: movl %ebx, 4(%eax) +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: addl $8, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -36,7 +36,7 @@ ; SSE2-NEXT: psrlq %xmm4, %xmm1 ; SSE2-NEXT: shufpd {{.*#+}} xmm5 = xmm5[0],xmm1[1] ; SSE2-NEXT: pandn %xmm3, %xmm2 -; SSE2-NEXT: psllq $1, %xmm0 +; SSE2-NEXT: paddq %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psllq %xmm2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -56,12 +56,12 @@ ; SSE41-NEXT: psrlq %xmm4, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: pandn %xmm3, %xmm2 -; SSE41-NEXT: psllq $1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psllq %xmm2, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3] +; SSE41-NEXT: paddq %xmm0, %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: psllq %xmm1, %xmm3 ; SSE41-NEXT: psllq %xmm2, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] ; SSE41-NEXT: por %xmm5, %xmm0 ; SSE41-NEXT: retq ; @@ -74,11 +74,11 @@ ; AVX1-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpsllq %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -88,7 +88,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq @@ -99,7 +99,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -110,7 +110,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -121,7 +121,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -142,7 +142,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -156,13 +156,13 @@ ; XOPAVX1-LABEL: var_funnnel_v2i64: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [63,63] -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 -; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm5, %xmm4 +; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -172,7 +172,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlvq %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpsllvq %xmm2, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq @@ -188,7 +188,7 @@ ; X86-SSE2-NEXT: psrlq %xmm5, %xmm1 ; X86-SSE2-NEXT: shufpd {{.*#+}} xmm3 = xmm3[0],xmm1[1] ; X86-SSE2-NEXT: pandn %xmm4, %xmm2 -; X86-SSE2-NEXT: psllq $1, %xmm0 +; X86-SSE2-NEXT: paddq %xmm0, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE2-NEXT: psllq %xmm2, %xmm1 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] @@ -225,7 +225,7 @@ ; SSE2-NEXT: pslld $23, %xmm2 ; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE2-NEXT: cvttps2dq %xmm2, %xmm1 -; SSE2-NEXT: pslld $1, %xmm0 +; SSE2-NEXT: paddd %xmm0, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE2-NEXT: pmuludq %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -260,7 +260,7 @@ ; SSE41-NEXT: pslld $23, %xmm2 ; SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE41-NEXT: cvttps2dq %xmm2, %xmm1 -; SSE41-NEXT: pslld $1, %xmm0 +; SSE41-NEXT: paddd %xmm0, %xmm0 ; SSE41-NEXT: pmulld %xmm1, %xmm0 ; SSE41-NEXT: por %xmm6, %xmm0 ; SSE41-NEXT: retq @@ -285,7 +285,7 @@ ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 -; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -296,7 +296,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq @@ -307,7 +307,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -318,7 +318,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -329,7 +329,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -350,7 +350,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -364,13 +364,13 @@ ; XOPAVX1-LABEL: var_funnnel_v4i32: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [31,31,31,31] -; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOPAVX1-NEXT: vpslld $1, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 -; XOPAVX1-NEXT: vpshld %xmm2, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOPAVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; XOPAVX1-NEXT: vpsubd %xmm4, %xmm5, %xmm4 +; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: retq ; @@ -380,7 +380,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlvd %xmm4, %xmm1, %xmm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpslld $1, %xmm0, %xmm0 +; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpsllvd %xmm2, %xmm0, %xmm0 ; XOPAVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX2-NEXT: retq @@ -409,7 +409,7 @@ ; X86-SSE2-NEXT: pslld $23, %xmm2 ; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 ; X86-SSE2-NEXT: cvttps2dq %xmm2, %xmm1 -; X86-SSE2-NEXT: pslld $1, %xmm0 +; X86-SSE2-NEXT: paddd %xmm0, %xmm0 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -473,7 +473,7 @@ ; SSE2-NEXT: pslld $16, %xmm2 ; SSE2-NEXT: psrad $16, %xmm2 ; SSE2-NEXT: packssdw %xmm4, %xmm2 -; SSE2-NEXT: psllw $1, %xmm0 +; SSE2-NEXT: paddw %xmm0, %xmm0 ; SSE2-NEXT: pmullw %xmm2, %xmm0 ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: por %xmm3, %xmm0 @@ -519,7 +519,7 @@ ; SSE41-NEXT: paddd %xmm4, %xmm0 ; SSE41-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE41-NEXT: packusdw %xmm2, %xmm0 -; SSE41-NEXT: psllw $1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: pmullw %xmm0, %xmm3 ; SSE41-NEXT: por %xmm1, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 @@ -554,7 +554,7 @@ ; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -608,7 +608,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper @@ -630,7 +630,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlvw %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvw %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -644,13 +644,13 @@ ; XOP-LABEL: var_funnnel_v8i16: ; XOP: # %bb.0: ; XOP-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15] -; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm4 -; XOP-NEXT: vpsllw $1, %xmm0, %xmm0 -; XOP-NEXT: vpshlw %xmm4, %xmm0, %xmm0 -; XOP-NEXT: vpand %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; XOP-NEXT: vpsubw %xmm2, %xmm3, %xmm2 -; XOP-NEXT: vpshlw %xmm2, %xmm1, %xmm1 +; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 +; XOP-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; XOP-NEXT: vpsubw %xmm4, %xmm5, %xmm4 +; XOP-NEXT: vpshlw %xmm4, %xmm1, %xmm1 +; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 +; XOP-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; XOP-NEXT: vpshlw %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq ; @@ -703,7 +703,7 @@ ; X86-SSE2-NEXT: pslld $16, %xmm2 ; X86-SSE2-NEXT: psrad $16, %xmm2 ; X86-SSE2-NEXT: packssdw %xmm4, %xmm2 -; X86-SSE2-NEXT: psllw $1, %xmm0 +; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: pmullw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: por %xmm3, %xmm0 @@ -1036,7 +1036,7 @@ ; SSE-NEXT: pand %xmm3, %xmm4 ; SSE-NEXT: psrlq %xmm4, %xmm1 ; SSE-NEXT: pandn %xmm3, %xmm2 -; SSE-NEXT: psllq $1, %xmm0 +; SSE-NEXT: paddq %xmm0, %xmm0 ; SSE-NEXT: psllq %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1047,7 +1047,7 @@ ; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1058,7 +1058,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1069,7 +1069,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1080,7 +1080,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -1101,7 +1101,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -1119,7 +1119,7 @@ ; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOP-NEXT: vpsrlq %xmm4, %xmm1, %xmm1 ; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOP-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq @@ -1131,7 +1131,7 @@ ; X86-SSE2-NEXT: pand %xmm3, %xmm4 ; X86-SSE2-NEXT: psrlq %xmm4, %xmm1 ; X86-SSE2-NEXT: pandn %xmm3, %xmm2 -; X86-SSE2-NEXT: psllq $1, %xmm0 +; X86-SSE2-NEXT: paddq %xmm0, %xmm0 ; X86-SSE2-NEXT: psllq %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl @@ -1256,7 +1256,7 @@ ; SSE-NEXT: pand %xmm3, %xmm4 ; SSE-NEXT: psrlw %xmm4, %xmm1 ; SSE-NEXT: pandn %xmm3, %xmm2 -; SSE-NEXT: psllw $1, %xmm0 +; SSE-NEXT: paddw %xmm0, %xmm0 ; SSE-NEXT: psllw %xmm2, %xmm0 ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1267,7 +1267,7 @@ ; AVX-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1278,7 +1278,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1289,7 +1289,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1300,7 +1300,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -1321,7 +1321,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -1339,7 +1339,7 @@ ; XOP-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOP-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 ; XOP-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOP-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOP-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq @@ -1351,7 +1351,7 @@ ; X86-SSE2-NEXT: pand %xmm3, %xmm4 ; X86-SSE2-NEXT: psrlw %xmm4, %xmm1 ; X86-SSE2-NEXT: pandn %xmm3, %xmm2 -; X86-SSE2-NEXT: psllw $1, %xmm0 +; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: psllw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl @@ -1761,7 +1761,7 @@ ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; SSE2-NEXT: pandn %xmm1, %xmm2 ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: psllw $1, %xmm0 +; SSE2-NEXT: paddw %xmm0, %xmm0 ; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: por %xmm1, %xmm0 @@ -1772,7 +1772,7 @@ ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = ; SSE41-NEXT: pmulhuw %xmm1, %xmm2 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; SSE41-NEXT: psllw $1, %xmm0 +; SSE41-NEXT: paddw %xmm0, %xmm0 ; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: por %xmm2, %xmm0 ; SSE41-NEXT: retq @@ -1781,7 +1781,7 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1790,7 +1790,7 @@ ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1799,7 +1799,7 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] -; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1810,7 +1810,7 @@ ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7] ; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8] -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper @@ -1829,7 +1829,7 @@ ; AVX512VLBW-LABEL: constant_funnnel_v8i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq @@ -1843,7 +1843,7 @@ ; XOP-LABEL: constant_funnnel_v8i16: ; XOP: # %bb.0: ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; XOP-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOP-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOP-NEXT: retq @@ -1853,7 +1853,7 @@ ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535] ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 ; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: psllw $1, %xmm0 +; X86-SSE2-NEXT: paddw %xmm0, %xmm0 ; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: por %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -37,17 +37,17 @@ ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; AVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vpsllq $1, %xmm4, %xmm4 -; AVX1-NEXT: vpsllq %xmm3, %xmm4, %xmm5 -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] -; AVX1-NEXT: vpsllq %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0,1,2,3],xmm3[4,5,6,7] -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm4 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpaddq %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm4 +; AVX1-NEXT: vpsllq %xmm3, %xmm5, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7] +; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm2[2,3,2,3] +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpsllq %xmm4, %xmm0, %xmm4 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -58,7 +58,7 @@ ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -69,7 +69,7 @@ ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -80,7 +80,7 @@ ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VL-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -91,7 +91,7 @@ ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512BW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -111,7 +111,7 @@ ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VLBW-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VLBW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -125,23 +125,23 @@ ; XOPAVX1-LABEL: var_funnnel_v4i64: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vmovaps {{.*#+}} ymm3 = [63,63,63,63] -; XOPAVX1-NEXT: vandnps %ymm3, %ymm2, %ymm4 +; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm4 ; XOPAVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 -; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; XOPAVX1-NEXT: vpsllq $1, %xmm6, %xmm6 -; XOPAVX1-NEXT: vpshlq %xmm5, %xmm6, %xmm5 -; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 -; XOPAVX1-NEXT: vpshlq %xmm4, %xmm0, %xmm0 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 -; XOPAVX1-NEXT: vandps %ymm3, %ymm2, %ymm2 +; XOPAVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; XOPAVX1-NEXT: vpsubq %xmm5, %xmm6, %xmm5 +; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm7 +; XOPAVX1-NEXT: vpshlq %xmm5, %xmm7, %xmm5 +; XOPAVX1-NEXT: vpsubq %xmm4, %xmm6, %xmm4 +; XOPAVX1-NEXT: vpshlq %xmm4, %xmm1, %xmm1 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 +; XOPAVX1-NEXT: vandnps %ymm3, %ymm2, %ymm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; XOPAVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; XOPAVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm3 -; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 -; XOPAVX1-NEXT: vpshlq %xmm3, %xmm5, %xmm3 -; XOPAVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 -; XOPAVX1-NEXT: vpshlq %xmm2, %xmm1, %xmm1 -; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; XOPAVX1-NEXT: vpaddq %xmm4, %xmm4, %xmm4 +; XOPAVX1-NEXT: vpshlq %xmm3, %xmm4, %xmm3 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; XOPAVX1-NEXT: retq ; @@ -151,7 +151,7 @@ ; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; XOPAVX2-NEXT: vpsrlvq %ymm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; XOPAVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -184,7 +184,7 @@ ; AVX1-NEXT: vpaddd %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vpslld $1, %xmm7, %xmm7 +; AVX1-NEXT: vpaddd %xmm7, %xmm7, %xmm7 ; AVX1-NEXT: vpmulld %xmm4, %xmm7, %xmm4 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero @@ -202,7 +202,7 @@ ; AVX1-NEXT: vpslld $23, %xmm2, %xmm2 ; AVX1-NEXT: vpaddd %xmm6, %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 -; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -214,7 +214,7 @@ ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -225,7 +225,7 @@ ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -236,7 +236,7 @@ ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VL-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -247,7 +247,7 @@ ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512BW-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -267,7 +267,7 @@ ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VLBW-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VLBW-NEXT: vpslld $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -289,13 +289,13 @@ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [31,31,31,31] ; XOPAVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; XOPAVX1-NEXT: vpslld $1, %xmm7, %xmm7 +; XOPAVX1-NEXT: vpaddd %xmm7, %xmm7, %xmm7 ; XOPAVX1-NEXT: vpshld %xmm3, %xmm7, %xmm3 ; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsubd %xmm2, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpshld %xmm4, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpslld $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshld %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -307,7 +307,7 @@ ; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; XOPAVX2-NEXT: vpsrlvd %ymm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; XOPAVX2-NEXT: vpslld $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -348,7 +348,7 @@ ; AVX1-NEXT: vcvttps2dq %xmm6, %xmm6 ; AVX1-NEXT: vpackusdw %xmm7, %xmm6, %xmm6 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; AVX1-NEXT: vpsllw $1, %xmm7, %xmm7 +; AVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 ; AVX1-NEXT: vpmullw %xmm6, %xmm7, %xmm6 ; AVX1-NEXT: vpor %xmm4, %xmm6, %xmm4 ; AVX1-NEXT: vpsllw $12, %xmm2, %xmm6 @@ -375,7 +375,7 @@ ; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttps2dq %xmm2, %xmm2 ; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 @@ -427,7 +427,7 @@ ; AVX512BW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -447,7 +447,7 @@ ; AVX512VLBW-NEXT: vpand %ymm3, %ymm2, %ymm4 ; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvw %ymm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -469,13 +469,13 @@ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [15,15,15,15,15,15,15,15] ; XOPAVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; XOPAVX1-NEXT: vpsllw $1, %xmm7, %xmm7 +; XOPAVX1-NEXT: vpaddw %xmm7, %xmm7, %xmm7 ; XOPAVX1-NEXT: vpshlw %xmm3, %xmm7, %xmm3 ; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsubw %xmm2, %xmm4, %xmm4 ; XOPAVX1-NEXT: vpshlw %xmm4, %xmm1, %xmm1 ; XOPAVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshlw %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -484,22 +484,22 @@ ; XOPAVX2-LABEL: var_funnnel_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm4 +; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm4 ; XOPAVX2-NEXT: vextracti128 $1, %ymm4, %xmm5 -; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0 -; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm6 -; XOPAVX2-NEXT: vpshlw %xmm5, %xmm6, %xmm5 -; XOPAVX2-NEXT: vpshlw %xmm4, %xmm0, %xmm0 -; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 +; XOPAVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6 +; XOPAVX2-NEXT: vpsubw %xmm5, %xmm6, %xmm5 +; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm7 +; XOPAVX2-NEXT: vpshlw %xmm5, %xmm7, %xmm5 +; XOPAVX2-NEXT: vpsubw %xmm4, %xmm6, %xmm4 +; XOPAVX2-NEXT: vpshlw %xmm4, %xmm1, %xmm1 +; XOPAVX2-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpandn %ymm3, %ymm2, %ymm2 ; XOPAVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 -; XOPAVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; XOPAVX2-NEXT: vpsubw %xmm3, %xmm4, %xmm3 -; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 -; XOPAVX2-NEXT: vpshlw %xmm3, %xmm5, %xmm3 -; XOPAVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm2 -; XOPAVX2-NEXT: vpshlw %xmm2, %xmm1, %xmm1 -; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 +; XOPAVX2-NEXT: vpshlw %xmm3, %xmm4, %xmm3 +; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0 +; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %amt) @@ -782,9 +782,9 @@ ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpsllq $1, %xmm3, %xmm3 +; AVX1-NEXT: vpaddq %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -796,7 +796,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -807,7 +807,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -818,7 +818,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -829,7 +829,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -849,7 +849,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -871,9 +871,9 @@ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1 ; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; XOPAVX1-NEXT: vpsllq $1, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpaddq %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm3 -; XOPAVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsllq %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -885,7 +885,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlq %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllq $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddq %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllq %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -1020,11 +1020,11 @@ ; AVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 ; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3 +; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 ; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -1036,7 +1036,7 @@ ; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1047,7 +1047,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1058,7 +1058,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -1069,7 +1069,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -1089,7 +1089,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -1109,11 +1109,11 @@ ; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 ; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; XOPAVX1-NEXT: vpsllw $1, %xmm3, %xmm3 +; XOPAVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; XOPAVX1-NEXT: vpsrlw %xmm4, %xmm1, %xmm1 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 @@ -1125,7 +1125,7 @@ ; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm4 ; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 ; XOPAVX2-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq @@ -1494,10 +1494,10 @@ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm2 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm2 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -1508,7 +1508,7 @@ ; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1518,7 +1518,7 @@ ; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1528,7 +1528,7 @@ ; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -1539,7 +1539,7 @@ ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -1556,7 +1556,7 @@ ; AVX512VLBW-LABEL: constant_funnnel_v16i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -1573,10 +1573,10 @@ ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm2 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm2 ; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; XOPAVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; XOPAVX1-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; XOPAVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 @@ -1587,7 +1587,7 @@ ; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] -; XOPAVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -22,7 +22,7 @@ ; AVX512F-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512F-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -33,7 +33,7 @@ ; AVX512VL-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512VL-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512VL-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -44,7 +44,7 @@ ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512BW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -61,7 +61,7 @@ ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512VLBW-NEXT: vpsrlvq %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvq %zmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -82,7 +82,7 @@ ; AVX512F-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512F-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -93,7 +93,7 @@ ; AVX512VL-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512VL-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -104,7 +104,7 @@ ; AVX512BW-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512BW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512BW-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -121,7 +121,7 @@ ; AVX512VLBW-NEXT: vpandd %zmm3, %zmm2, %zmm4 ; AVX512VLBW-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandnd %zmm3, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpslld $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddd %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -188,7 +188,7 @@ ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -205,7 +205,7 @@ ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm2, %zmm4 ; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandnq %zmm3, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvw %zmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -428,7 +428,7 @@ ; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512F-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512F-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512F-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -439,7 +439,7 @@ ; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VL-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -450,7 +450,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -468,7 +468,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlq %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllq $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddq %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllq %xmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -554,9 +554,9 @@ ; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3 +; AVX512F-NEXT: vpaddw %ymm3, %ymm3, %ymm3 ; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -572,9 +572,9 @@ ; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3 +; AVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -586,7 +586,7 @@ ; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -604,7 +604,7 @@ ; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm4 ; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq @@ -853,7 +853,7 @@ ; AVX512BW-LABEL: constant_funnnel_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -867,7 +867,7 @@ ; AVX512VLBW-LABEL: constant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -963,7 +963,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm4 ; SSE41-NEXT: psrlw %xmm3, %xmm4 ; SSE41-NEXT: pandn %xmm2, %xmm1 -; SSE41-NEXT: psllw $1, %xmm0 +; SSE41-NEXT: paddw %xmm0, %xmm0 ; SSE41-NEXT: psllw %xmm1, %xmm0 ; SSE41-NEXT: por %xmm4, %xmm0 ; SSE41-NEXT: retq @@ -974,7 +974,7 @@ ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -985,7 +985,7 @@ ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512F-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512F-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -996,7 +996,7 @@ ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1007,7 +1007,7 @@ ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512BW-NEXT: retq @@ -1018,7 +1018,7 @@ ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -789,11 +789,11 @@ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vpsrlw %xmm3, %xmm4, %xmm5 ; AVX1-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $1, %xmm4, %xmm2 +; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm2 ; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 ; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2 ; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 -; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -805,7 +805,7 @@ ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX2-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -816,7 +816,7 @@ ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -827,7 +827,7 @@ ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: retq @@ -838,7 +838,7 @@ ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512BW-NEXT: retq @@ -849,7 +849,7 @@ ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -301,9 +301,9 @@ ; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm2 +; AVX512F-NEXT: vpaddw %ymm4, %ymm4, %ymm2 ; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -318,9 +318,9 @@ ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 ; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm2 +; AVX512VL-NEXT: vpaddw %ymm4, %ymm4, %ymm2 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -332,7 +332,7 @@ ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 ; AVX512BW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -343,7 +343,7 @@ ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vpandn %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllw $1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm0, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll --- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll +++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: pmullw %xmm1, %xmm2 ; CHECK-NEXT: psrlw $15, %xmm2 ; CHECK-NEXT: pmulhw %xmm1, %xmm0 -; CHECK-NEXT: psllw $1, %xmm0 +; CHECK-NEXT: paddw %xmm0, %xmm0 ; CHECK-NEXT: por %xmm2, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.smul.fix.v4i16(<4 x i16> , <4 x i16> %a, i32 15) @@ -33,7 +33,7 @@ ; CHECK-NEXT: pmullw %xmm1, %xmm2 ; CHECK-NEXT: psrlw $15, %xmm2 ; CHECK-NEXT: pmulhuw %xmm1, %xmm0 -; CHECK-NEXT: psllw $1, %xmm0 +; CHECK-NEXT: paddw %xmm0, %xmm0 ; CHECK-NEXT: por %xmm2, %xmm0 ; CHECK-NEXT: retq %t = call <4 x i16> @llvm.umul.fix.v4i16(<4 x i16> , <4 x i16> %a, i32 15) diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -927,23 +927,23 @@ ; SSE2-LABEL: constant_shift_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psllq $1, %xmm1 -; SSE2-NEXT: psllq $7, %xmm0 -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: psllq $7, %xmm1 +; SSE2-NEXT: paddq %xmm0, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: constant_shift_v2i64: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psllq $7, %xmm1 -; SSE41-NEXT: psllq $1, %xmm0 +; SSE41-NEXT: paddq %xmm0, %xmm0 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: constant_shift_v2i64: ; AVX1: # %bb.0: ; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: retq ; @@ -975,9 +975,9 @@ ; X86-SSE-LABEL: constant_shift_v2i64: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE-NEXT: psllq $1, %xmm1 -; X86-SSE-NEXT: psllq $7, %xmm0 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X86-SSE-NEXT: psllq $7, %xmm1 +; X86-SSE-NEXT: paddq %xmm0, %xmm0 +; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; X86-SSE-NEXT: retl %shift = shl <2 x i64> %a, ret <2 x i64> %shift diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -1061,7 +1061,7 @@ ; AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 -; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -1101,7 +1101,7 @@ ; X86-AVX1-NEXT: vpsllq $31, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] ; X86-AVX1-NEXT: vpsllq $7, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-AVX1-NEXT: retl