diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39609,6 +39609,7 @@ SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = N0.getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Try to match patterns such as // (i16 bitcast (v16i1 x)) @@ -39666,8 +39667,7 @@ // If we're bitcasting from iX to vXi1, see if the integer originally // began as a vXi1 and whether we can remove the bitcast entirely. if (VT.isVector() && VT.getScalarType() == MVT::i1 && - SrcVT.isScalarInteger() && - DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) { if (SDValue V = combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget)) return V; @@ -39855,8 +39855,11 @@ default: return SDValue(); } + // Check if we have a bitcast from another integer type as well. if (!((Subtarget.hasSSE1() && VT == MVT::f32) || - (Subtarget.hasSSE2() && VT == MVT::f64))) + (Subtarget.hasSSE2() && VT == MVT::f64) || + (Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() && + TLI.isTypeLegal(VT)))) return SDValue(); SDValue LogicOp0 = N0.getOperand(0); @@ -39868,14 +39871,16 @@ LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT && !isa(LogicOp0.getOperand(0))) { SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1); - return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); + unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); + return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); } // bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y) if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST && LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT && !isa(LogicOp1.getOperand(0))) { SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0); - return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); + unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); + return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -911,7 +911,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; @@ -933,7 +933,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: retq ; @@ -955,7 +955,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm2 +; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm2 ; KNL-NEXT: vpord %zmm1, %zmm2, %zmm0 {%k1} ; KNL-NEXT: retq ; @@ -977,7 +977,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: vpord %zmm1, %zmm0, %zmm1 {%k1} ; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 ; KNL-NEXT: retq @@ -1001,7 +1001,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm2 +; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm2 ; KNL-NEXT: vpxorq %zmm1, %zmm2, %zmm0 {%k1} ; KNL-NEXT: retq ; @@ -1023,7 +1023,7 @@ ; KNL: ## %bb.0: ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; KNL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm1 {%k1} ; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 ; KNL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -341,7 +341,7 @@ ; X86-NEXT: vpsrad $29, %xmm0, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpand %xmm0, %xmm1, %xmm0 ; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -1671,7 +1671,7 @@ ; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero ; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -688,7 +688,7 @@ ; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero ; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -688,7 +688,7 @@ ; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero ; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -1650,7 +1650,7 @@ ; X64-AVX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero ; X64-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; X64-AVX-NEXT: vpxor (%rsi), %xmm0, %xmm0 -; X64-AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: vptest %xmm0, %xmm0 ; X64-AVX-NEXT: sete %al ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -217,7 +217,7 @@ ; X86-AVX512F: # %bb.0: ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 -; X86-AVX512F-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm0 +; X86-AVX512F-NEXT: vpandd {{\.LCPI.*}}, %zmm0, %zmm0 ; X86-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3 diff --git a/llvm/test/CodeGen/X86/pr40891.ll b/llvm/test/CodeGen/X86/pr40891.ll --- a/llvm/test/CodeGen/X86/pr40891.ll +++ b/llvm/test/CodeGen/X86/pr40891.ll @@ -8,9 +8,11 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vandps {{\.LCPI.*}}, %ymm1, %ymm1 -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm2 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm2[0,2],ymm0[0,2],ymm2[4,6],ymm0[4,6] ; CHECK-NEXT: retl %a = shufflevector <4 x i64> %y, <4 x i64> , <8 x i32> %b = and <8 x i64> %x, %a diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll --- a/llvm/test/CodeGen/X86/sse2.ll +++ b/llvm/test/CodeGen/X86/sse2.ll @@ -678,38 +678,16 @@ ; X86-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0 ; X86-SSE-NEXT: retl ; -; X86-AVX-LABEL: PR19721: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X86-AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; X86-AVX-NEXT: retl +; AVX-LABEL: PR19721: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX-NEXT: ret{{[l|q]}} ; ; X64-SSE-LABEL: PR19721: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq %xmm0, %rax -; X64-SSE-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-SSE-NEXT: andq %rax, %rcx -; X64-SSE-NEXT: movq %rcx, %xmm1 -; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0 ; X64-SSE-NEXT: retq -; -; X64-AVX1-LABEL: PR19721: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovq %xmm0, %rax -; X64-AVX1-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-AVX1-NEXT: andq %rax, %rcx -; X64-AVX1-NEXT: vmovq %rcx, %xmm1 -; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; X64-AVX1-NEXT: retq -; -; X64-AVX512-LABEL: PR19721: -; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovq %xmm0, %rax -; X64-AVX512-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000 -; X64-AVX512-NEXT: andq %rax, %rcx -; X64-AVX512-NEXT: vmovq %rcx, %xmm1 -; X64-AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; X64-AVX512-NEXT: retq %bc = bitcast <4 x i32> %i to i128 %insert = and i128 %bc, -4294967296 %bc2 = bitcast i128 %insert to <4 x i32> diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll --- a/llvm/test/CodeGen/X86/vec_saddo.ll +++ b/llvm/test/CodeGen/X86/vec_saddo.ll @@ -813,10 +813,9 @@ ; SSE-NEXT: pxor %xmm0, %xmm2 ; SSE-NEXT: movdqa %xmm3, %xmm4 ; SSE-NEXT: pcmpgtd %xmm2, %xmm4 -; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] ; SSE-NEXT: pcmpeqd %xmm3, %xmm2 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; SSE-NEXT: pand %xmm5, %xmm2 +; SSE-NEXT: pand %xmm4, %xmm2 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] ; SSE-NEXT: por %xmm2, %xmm3 ; SSE-NEXT: pxor %xmm2, %xmm2 diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll --- a/llvm/test/CodeGen/X86/vec_ssubo.ll +++ b/llvm/test/CodeGen/X86/vec_ssubo.ll @@ -819,19 +819,17 @@ ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: movdqa %xmm3, %xmm4 ; SSE-NEXT: pcmpgtd %xmm0, %xmm4 -; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] ; SSE-NEXT: pcmpeqd %xmm3, %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE-NEXT: pand %xmm5, %xmm0 +; SSE-NEXT: pand %xmm4, %xmm0 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] ; SSE-NEXT: por %xmm0, %xmm3 ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: pcmpgtd %xmm2, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] ; SSE-NEXT: pcmpeqd %xmm2, %xmm1 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE-NEXT: pand %xmm4, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: pxor %xmm3, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll --- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll @@ -249,9 +249,9 @@ define void @test_urem_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X64-LABEL: test_urem_pow2_v2i32: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: andps {{.*}}(%rip), %xmm0 -; X64-NEXT: movlps %xmm0, (%rsi) +; X64-NEXT: movabsq $30064771079, %rax # imm = 0x700000007 +; X64-NEXT: andq (%rdi), %rax +; X64-NEXT: movq %rax, (%rsi) ; X64-NEXT: retq ; ; X86-LABEL: test_urem_pow2_v2i32: diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll --- a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BWVL ; ; vXi64 @@ -477,7 +477,7 @@ ; AVX-LABEL: test_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: sete %al @@ -505,7 +505,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: setne %al @@ -537,7 +537,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: testw %ax, %ax ; AVX-NEXT: sete %al @@ -570,8 +570,9 @@ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: testw %ax, %ax @@ -586,8 +587,9 @@ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: testw %ax, %ax @@ -595,21 +597,38 @@ ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: testw %ax, %ax -; AVX512-NEXT: setne %al -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512BW-LABEL: test_v16i16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; AVX512BW-NEXT: vpand %xmm0, %xmm1, %xmm2 +; AVX512BW-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: testw %ax, %ax +; AVX512BW-NEXT: setne %al +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: test_v16i16: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BWVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX512BWVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; AVX512BWVL-NEXT: vpand %xmm0, %xmm1, %xmm2 +; AVX512BWVL-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX512BWVL-NEXT: vpternlogq $128, %xmm1, %xmm0, %xmm2 +; AVX512BWVL-NEXT: vmovd %xmm2, %eax +; AVX512BWVL-NEXT: testw %ax, %ax +; AVX512BWVL-NEXT: setne %al +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq %1 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a0) %2 = icmp ne i16 %1, 0 ret i1 %2 @@ -641,9 +660,10 @@ ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] +; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: testw %ax, %ax ; AVX1-NEXT: sete %al @@ -658,8 +678,9 @@ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: testw %ax, %ax diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll --- a/llvm/test/CodeGen/X86/vector-reduce-and.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll @@ -407,7 +407,7 @@ ; AVX-LABEL: test_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -432,7 +432,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -461,7 +461,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -557,9 +557,10 @@ ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] +; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -573,8 +574,9 @@ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll @@ -407,7 +407,7 @@ ; AVX-LABEL: test_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -432,7 +432,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -461,7 +461,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -557,9 +557,10 @@ ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] +; AVX1-NEXT: vorps %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -573,8 +574,9 @@ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll --- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll @@ -407,7 +407,7 @@ ; AVX-LABEL: test_v2i16: ; AVX: # %bb.0: ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -432,7 +432,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -461,7 +461,7 @@ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq @@ -557,9 +557,10 @@ ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,1,1] +; AVX1-NEXT: vxorps %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper @@ -573,8 +574,9 @@ ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -37,10 +37,9 @@ ; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE2-NEXT: por %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm3, %xmm1 @@ -68,10 +67,9 @@ ; SSSE3-NEXT: pxor %xmm1, %xmm0 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSSE3-NEXT: por %xmm0, %xmm1 ; SSSE3-NEXT: pand %xmm3, %xmm1 @@ -186,10 +184,9 @@ ; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE2-NEXT: por %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm3, %xmm1 @@ -218,10 +215,9 @@ ; SSSE3-NEXT: pxor %xmm1, %xmm0 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 -; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: pand %xmm2, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSSE3-NEXT: por %xmm0, %xmm1 ; SSSE3-NEXT: pand %xmm3, %xmm1 @@ -354,10 +350,9 @@ ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: por %xmm0, %xmm1 ; SSE2-NEXT: pand %xmm4, %xmm1 @@ -365,10 +360,9 @@ ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm4 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm5, %xmm2 +; SSE2-NEXT: pand %xmm4, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: pand %xmm3, %xmm0 @@ -410,10 +404,9 @@ ; SSSE3-NEXT: pxor %xmm2, %xmm0 ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] ; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: pand %xmm1, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSSE3-NEXT: por %xmm0, %xmm1 ; SSSE3-NEXT: pand %xmm4, %xmm1 @@ -421,10 +414,9 @@ ; SSSE3-NEXT: pxor %xmm2, %xmm0 ; SSSE3-NEXT: movdqa %xmm0, %xmm4 ; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 -; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] ; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm5, %xmm2 +; SSSE3-NEXT: pand %xmm4, %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] ; SSSE3-NEXT: por %xmm2, %xmm0 ; SSSE3-NEXT: pand %xmm3, %xmm0 @@ -584,9 +576,9 @@ ; SSE2-NEXT: movdqa 32(%rdi), %xmm6 ; SSE2-NEXT: movdqa 48(%rdi), %xmm9 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295] -; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] ; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: pxor %xmm11, %xmm2 +; SSE2-NEXT: pxor %xmm0, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483647,2147483647] ; SSE2-NEXT: movdqa %xmm10, %xmm5 ; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 @@ -600,7 +592,7 @@ ; SSE2-NEXT: pandn %xmm8, %xmm2 ; SSE2-NEXT: por %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm7, %xmm1 -; SSE2-NEXT: pxor %xmm11, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm10, %xmm3 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] @@ -613,7 +605,7 @@ ; SSE2-NEXT: pandn %xmm8, %xmm3 ; SSE2-NEXT: por %xmm7, %xmm3 ; SSE2-NEXT: movdqa %xmm6, %xmm1 -; SSE2-NEXT: pxor %xmm11, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm10, %xmm4 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] @@ -626,7 +618,7 @@ ; SSE2-NEXT: pandn %xmm8, %xmm7 ; SSE2-NEXT: por %xmm6, %xmm7 ; SSE2-NEXT: movdqa %xmm9, %xmm1 -; SSE2-NEXT: pxor %xmm11, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm10, %xmm4 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] @@ -639,51 +631,47 @@ ; SSE2-NEXT: pandn %xmm8, %xmm4 ; SSE2-NEXT: por %xmm9, %xmm4 ; SSE2-NEXT: movdqa %xmm4, %xmm1 -; SSE2-NEXT: pxor %xmm11, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm11, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm11, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm1 +; SSE2-NEXT: pand %xmm5, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] ; SSE2-NEXT: por %xmm1, %xmm5 ; SSE2-NEXT: pand %xmm4, %xmm5 ; SSE2-NEXT: movdqa %xmm7, %xmm1 -; SSE2-NEXT: pxor %xmm11, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm11, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm11, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm6 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: por %xmm6, %xmm1 ; SSE2-NEXT: pand %xmm7, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm5[0,2] -; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm11, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm11, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm11, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm5, %xmm0 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pxor %xmm0, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm4 -; SSE2-NEXT: pand %xmm3, %xmm4 -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm11, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm11, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm11, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] -; SSE2-NEXT: pand %xmm5, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm6, %xmm0 +; SSE2-NEXT: pand %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] +; SSE2-NEXT: por %xmm4, %xmm5 +; SSE2-NEXT: pand %xmm3, %xmm5 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pxor %xmm0, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: trunc_packus_v8i64_v8i32: @@ -693,9 +681,9 @@ ; SSSE3-NEXT: movdqa 32(%rdi), %xmm6 ; SSSE3-NEXT: movdqa 48(%rdi), %xmm9 ; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295] -; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648] +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] ; SSSE3-NEXT: movdqa %xmm3, %xmm2 -; SSSE3-NEXT: pxor %xmm11, %xmm2 +; SSSE3-NEXT: pxor %xmm0, %xmm2 ; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147483647,2147483647] ; SSSE3-NEXT: movdqa %xmm10, %xmm5 ; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5 @@ -709,7 +697,7 @@ ; SSSE3-NEXT: pandn %xmm8, %xmm2 ; SSSE3-NEXT: por %xmm3, %xmm2 ; SSSE3-NEXT: movdqa %xmm7, %xmm1 -; SSSE3-NEXT: pxor %xmm11, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm1 ; SSSE3-NEXT: movdqa %xmm10, %xmm3 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] @@ -722,7 +710,7 @@ ; SSSE3-NEXT: pandn %xmm8, %xmm3 ; SSSE3-NEXT: por %xmm7, %xmm3 ; SSSE3-NEXT: movdqa %xmm6, %xmm1 -; SSSE3-NEXT: pxor %xmm11, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm1 ; SSSE3-NEXT: movdqa %xmm10, %xmm4 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] @@ -735,7 +723,7 @@ ; SSSE3-NEXT: pandn %xmm8, %xmm7 ; SSSE3-NEXT: por %xmm6, %xmm7 ; SSSE3-NEXT: movdqa %xmm9, %xmm1 -; SSSE3-NEXT: pxor %xmm11, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm1 ; SSSE3-NEXT: movdqa %xmm10, %xmm4 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] @@ -748,51 +736,47 @@ ; SSSE3-NEXT: pandn %xmm8, %xmm4 ; SSSE3-NEXT: por %xmm9, %xmm4 ; SSSE3-NEXT: movdqa %xmm4, %xmm1 -; SSSE3-NEXT: pxor %xmm11, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm1 ; SSSE3-NEXT: movdqa %xmm1, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm11, %xmm5 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm11, %xmm1 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5 +; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSSE3-NEXT: pand %xmm6, %xmm1 +; SSSE3-NEXT: pand %xmm5, %xmm1 ; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] ; SSSE3-NEXT: por %xmm1, %xmm5 ; SSSE3-NEXT: pand %xmm4, %xmm5 ; SSSE3-NEXT: movdqa %xmm7, %xmm1 -; SSSE3-NEXT: pxor %xmm11, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm1 ; SSSE3-NEXT: movdqa %xmm1, %xmm4 -; SSSE3-NEXT: pcmpgtd %xmm11, %xmm4 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm11, %xmm1 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] -; SSSE3-NEXT: pand %xmm6, %xmm0 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 +; SSSE3-NEXT: pcmpeqd %xmm0, %xmm1 +; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm6 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] -; SSSE3-NEXT: por %xmm0, %xmm1 +; SSSE3-NEXT: por %xmm6, %xmm1 ; SSSE3-NEXT: pand %xmm7, %xmm1 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm5[0,2] -; SSSE3-NEXT: movdqa %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm11, %xmm0 -; SSSE3-NEXT: movdqa %xmm0, %xmm4 -; SSSE3-NEXT: pcmpgtd %xmm11, %xmm4 -; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm11, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm5, %xmm0 +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: pxor %xmm0, %xmm4 +; SSSE3-NEXT: movdqa %xmm4, %xmm5 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm5 +; SSSE3-NEXT: pcmpeqd %xmm0, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; SSSE3-NEXT: por %xmm0, %xmm4 -; SSSE3-NEXT: pand %xmm3, %xmm4 -; SSSE3-NEXT: movdqa %xmm2, %xmm0 -; SSSE3-NEXT: pxor %xmm11, %xmm0 -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pcmpgtd %xmm11, %xmm3 -; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] -; SSSE3-NEXT: pcmpeqd %xmm11, %xmm0 -; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] -; SSSE3-NEXT: pand %xmm5, %xmm6 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] -; SSSE3-NEXT: por %xmm6, %xmm0 +; SSSE3-NEXT: pand %xmm5, %xmm4 +; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] +; SSSE3-NEXT: por %xmm4, %xmm5 +; SSSE3-NEXT: pand %xmm3, %xmm5 +; SSSE3-NEXT: movdqa %xmm2, %xmm3 +; SSSE3-NEXT: pxor %xmm0, %xmm3 +; SSSE3-NEXT: movdqa %xmm3, %xmm4 +; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 +; SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSSE3-NEXT: pand %xmm4, %xmm3 +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] +; SSSE3-NEXT: por %xmm3, %xmm0 ; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: trunc_packus_v8i64_v8i32: diff --git a/llvm/test/CodeGen/X86/widen_cast-5.ll b/llvm/test/CodeGen/X86/widen_cast-5.ll --- a/llvm/test/CodeGen/X86/widen_cast-5.ll +++ b/llvm/test/CodeGen/X86/widen_cast-5.ll @@ -15,9 +15,9 @@ ; ; X64-LABEL: convert: ; X64: ## %bb.0: ## %entry -; X64-NEXT: movq %rsi, %xmm0 -; X64-NEXT: pxor {{.*}}(%rip), %xmm0 -; X64-NEXT: movq %xmm0, (%rdi) +; X64-NEXT: movabsq $140733193388287, %rax ## imm = 0x7FFF000000FF +; X64-NEXT: xorq %rsi, %rax +; X64-NEXT: movq %rax, (%rdi) ; X64-NEXT: retq entry: %conv = bitcast i64 %src to <2 x i32> diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll --- a/llvm/test/CodeGen/X86/xor.ll +++ b/llvm/test/CodeGen/X86/xor.ll @@ -394,14 +394,11 @@ define i32 @PR17487(i1 %tobool) { ; X86-LABEL: PR17487: ; X86: # %bb.0: -; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; X86-NEXT: pandn {{\.LCPI.*}}, %xmm0 -; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; X86-NEXT: movd %xmm0, %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: notb %cl ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $1, %ecx -; X86-NEXT: setne %al +; X86-NEXT: testb $1, %cl +; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LIN-LABEL: PR17487: