Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2765,7 +2765,7 @@ } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) - // Only perform this optimization after type legalization and before + // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and // we don't want to undo this promotion. @@ -2773,7 +2773,7 @@ // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && - Level == AfterLegalizeTypes) { + Level <= AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -77,15 +77,33 @@ define i16 @mand16(i16 %x, i16 %y) { ; CHECK-LABEL: mand16: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: andl %esi, %edi +; CHECK-NEXT: orl %eax, %edi +; CHECK-NEXT: movw %di, %ax +; CHECK-NEXT: retq + %ma = bitcast i16 %x to <16 x i1> + %mb = bitcast i16 %y to <16 x i1> + %mc = and <16 x i1> %ma, %mb + %md = xor <16 x i1> %ma, %mb + %me = or <16 x i1> %mc, %md + %ret = bitcast <16 x i1> %me to i16 + ret i16 %ret +} + +define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { +; CHECK-LABEL: mand16_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw (%rdi), %k0 +; CHECK-NEXT: kmovw (%rsi), %k1 ; CHECK-NEXT: kandw %k1, %k0, %k2 ; CHECK-NEXT: kxorw %k1, %k0, %k0 ; CHECK-NEXT: korw %k0, %k2, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: retq - %ma = bitcast i16 %x to <16 x i1> - %mb = bitcast i16 %y to <16 x i1> + %ma = load <16 x i1>, <16 x i1>* %x + %mb = load <16 x i1>, <16 x i1>* %y %mc = and <16 x i1> %ma, %mb %md = xor <16 x i1> %ma, %mb %me = or <16 x i1> %mc, %md @@ -265,13 +283,13 @@ ; KNL: ## BB#0: ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB14_1 +; KNL-NEXT: jg LBB15_1 ; KNL-NEXT: ## BB#2: ; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 -; KNL-NEXT: jmp LBB14_3 -; KNL-NEXT: LBB14_1: +; KNL-NEXT: jmp LBB15_3 +; KNL-NEXT: LBB15_1: ; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 -; KNL-NEXT: LBB14_3: +; KNL-NEXT: LBB15_3: ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: retq @@ -280,12 +298,12 @@ ; SKX: ## BB#0: ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: jg LBB14_1 +; SKX-NEXT: jg LBB15_1 ; SKX-NEXT: ## BB#2: ; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq -; SKX-NEXT: LBB14_1: +; SKX-NEXT: LBB15_1: ; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq @@ -300,13 +318,13 @@ ; KNL-LABEL: test9: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB15_1 +; KNL-NEXT: jg LBB16_1 ; KNL-NEXT: ## BB#2: ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 -; KNL-NEXT: jmp LBB15_3 -; KNL-NEXT: LBB15_1: +; KNL-NEXT: jmp LBB16_3 +; KNL-NEXT: LBB16_1: ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: LBB15_3: +; KNL-NEXT: LBB16_3: ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} @@ -316,13 +334,13 @@ ; SKX-LABEL: test9: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: jg LBB15_1 +; SKX-NEXT: jg LBB16_1 ; SKX-NEXT: ## BB#2: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 -; SKX-NEXT: jmp LBB15_3 -; SKX-NEXT: LBB15_1: +; SKX-NEXT: jmp LBB16_3 +; SKX-NEXT: LBB16_1: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 -; SKX-NEXT: LBB15_3: +; SKX-NEXT: LBB16_3: ; SKX-NEXT: vpmovb2m %xmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: retq @@ -339,22 +357,22 @@ ; KNL-LABEL: test11: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB17_2 +; KNL-NEXT: jg LBB18_2 ; KNL-NEXT: ## BB#1: ; KNL-NEXT: vmovaps %zmm1, %zmm0 -; KNL-NEXT: LBB17_2: +; KNL-NEXT: LBB18_2: ; KNL-NEXT: retq ; ; SKX-LABEL: test11: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: jg LBB17_1 +; SKX-NEXT: jg LBB18_1 ; SKX-NEXT: ## BB#2: ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 -; SKX-NEXT: jmp LBB17_3 -; SKX-NEXT: LBB17_1: +; SKX-NEXT: jmp LBB18_3 +; SKX-NEXT: LBB18_1: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: LBB17_3: +; SKX-NEXT: LBB18_3: ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: retq @@ -794,11 +812,11 @@ ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb %al, %al -; KNL-NEXT: je LBB38_2 +; KNL-NEXT: je LBB39_2 ; KNL-NEXT: ## BB#1: ## %L1 ; KNL-NEXT: vmovapd %zmm0, (%rdi) ; KNL-NEXT: retq -; KNL-NEXT: LBB38_2: ## %L2 +; KNL-NEXT: LBB39_2: ## %L2 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi) ; KNL-NEXT: retq ; @@ -809,11 +827,11 @@ ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} ; SKX-NEXT: ktestb %k0, %k0 -; SKX-NEXT: je LBB38_2 +; SKX-NEXT: je LBB39_2 ; SKX-NEXT: ## BB#1: ## %L1 ; SKX-NEXT: vmovapd %zmm0, (%rdi) ; SKX-NEXT: retq -; SKX-NEXT: LBB38_2: ## %L2 +; SKX-NEXT: LBB39_2: ## %L2 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) ; SKX-NEXT: retq %addr1 = getelementptr double, double * %base, i64 0 @@ -859,12 +877,12 @@ ; SKX-NEXT: kunpckwd %k1, %k2, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 ; SKX-NEXT: ktestd %k0, %k0 -; SKX-NEXT: je LBB39_2 +; SKX-NEXT: je LBB40_2 ; SKX-NEXT: ## BB#1: ## %L1 ; SKX-NEXT: vmovaps %zmm0, (%rdi) ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) ; SKX-NEXT: retq -; SKX-NEXT: LBB39_2: ## %L2 +; SKX-NEXT: LBB40_2: ## %L2 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) ; SKX-NEXT: retq Index: test/CodeGen/X86/avx512-select.ll =================================================================== --- test/CodeGen/X86/avx512-select.ll +++ test/CodeGen/X86/avx512-select.ll @@ -71,10 +71,8 @@ define i8 @select05(i8 %a.0, i8 %m) { ; CHECK-LABEL: select05: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k0 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> @@ -83,13 +81,28 @@ ret i8 %res; } +define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) { +; CHECK-LABEL: select05_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbw (%rsi), %ax +; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: movzbw (%rdi), %ax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: korw %k1, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %mask = load <8 x i1> , <8 x i1>* %m + %a = load <8 x i1> , <8 x i1>* %a.0 + %r = select <8 x i1> %mask, <8 x i1> , <8 x i1> %a + %res = bitcast <8 x i1> %r to i8 + ret i8 %res; +} + define i8 @select06(i8 %a.0, i8 %m) { ; CHECK-LABEL: select06: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %esi, %k0 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: andl %esi, %edi +; CHECK-NEXT: movb %dil, %al ; CHECK-NEXT: retq %mask = bitcast i8 %m to <8 x i1> %a = bitcast i8 %a.0 to <8 x i1> @@ -98,6 +111,22 @@ ret i8 %res; } +define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) { +; CHECK-LABEL: select06_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbw (%rsi), %ax +; CHECK-NEXT: kmovw %eax, %k0 +; CHECK-NEXT: movzbw (%rdi), %ax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: kandw %k1, %k0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: retq + %mask = load <8 x i1> , <8 x i1>* %m + %a = load <8 x i1> , <8 x i1>* %a.0 + %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer + %res = bitcast <8 x i1> %r to i8 + ret i8 %res; +} define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) { ; CHECK-LABEL: select07: ; CHECK: ## BB#0: Index: test/CodeGen/X86/avx512bw-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512bw-mask-op.ll +++ test/CodeGen/X86/avx512bw-mask-op.ll @@ -80,15 +80,33 @@ define i32 @mand32(i32 %x, i32 %y) { ; CHECK-LABEL: mand32: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: xorl %esi, %edi +; CHECK-NEXT: orl %eax, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: retq + %ma = bitcast i32 %x to <32 x i1> + %mb = bitcast i32 %y to <32 x i1> + %mc = and <32 x i1> %ma, %mb + %md = xor <32 x i1> %ma, %mb + %me = or <32 x i1> %mc, %md + %ret = bitcast <32 x i1> %me to i32 + ret i32 %ret +} + +define i32 @mand32_mem(<32 x i1>* %x, <32 x i1>* %y) { +; CHECK-LABEL: mand32_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd (%rdi), %k0 +; CHECK-NEXT: kmovd (%rsi), %k1 ; CHECK-NEXT: kandd %k1, %k0, %k2 ; CHECK-NEXT: kxord %k1, %k0, %k0 ; CHECK-NEXT: kord %k0, %k2, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: retq - %ma = bitcast i32 %x to <32 x i1> - %mb = bitcast i32 %y to <32 x i1> + %ma = load <32 x i1>, <32 x i1>* %x + %mb = load <32 x i1>, <32 x i1>* %y %mc = and <32 x i1> %ma, %mb %md = xor <32 x i1> %ma, %mb %me = or <32 x i1> %mc, %md @@ -99,15 +117,33 @@ define i64 @mand64(i64 %x, i64 %y) { ; CHECK-LABEL: mand64: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovq %rdi, %k0 -; CHECK-NEXT: kmovq %rsi, %k1 +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: andq %rsi, %rax +; CHECK-NEXT: xorq %rsi, %rdi +; CHECK-NEXT: orq %rax, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %ma = bitcast i64 %x to <64 x i1> + %mb = bitcast i64 %y to <64 x i1> + %mc = and <64 x i1> %ma, %mb + %md = xor <64 x i1> %ma, %mb + %me = or <64 x i1> %mc, %md + %ret = bitcast <64 x i1> %me to i64 + ret i64 %ret +} + +define i64 @mand64_mem(<64 x i1>* %x, <64 x i1>* %y) { +; CHECK-LABEL: mand64_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovq (%rdi), %k0 +; CHECK-NEXT: kmovq (%rsi), %k1 ; CHECK-NEXT: kandq %k1, %k0, %k2 ; CHECK-NEXT: kxorq %k1, %k0, %k0 ; CHECK-NEXT: korq %k0, %k2, %k0 ; CHECK-NEXT: kmovq %k0, %rax ; CHECK-NEXT: retq - %ma = bitcast i64 %x to <64 x i1> - %mb = bitcast i64 %y to <64 x i1> + %ma = load <64 x i1>, <64 x i1>* %x + %mb = load <64 x i1>, <64 x i1>* %y %mc = and <64 x i1> %ma, %mb %md = xor <64 x i1> %ma, %mb %me = or <64 x i1> %mc, %md Index: test/CodeGen/X86/avx512dq-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512dq-mask-op.ll +++ test/CodeGen/X86/avx512dq-mask-op.ll @@ -32,15 +32,33 @@ define i8 @mand8(i8 %x, i8 %y) { ; CHECK-LABEL: mand8: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovb %edi, %k0 -; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: xorl %esi, %eax +; CHECK-NEXT: andl %esi, %edi +; CHECK-NEXT: orl %eax, %edi +; CHECK-NEXT: movb %dil, %al +; CHECK-NEXT: retq + %ma = bitcast i8 %x to <8 x i1> + %mb = bitcast i8 %y to <8 x i1> + %mc = and <8 x i1> %ma, %mb + %md = xor <8 x i1> %ma, %mb + %me = or <8 x i1> %mc, %md + %ret = bitcast <8 x i1> %me to i8 + ret i8 %ret +} + +define i8 @mand8_mem(<8 x i1>* %x, <8 x i1>* %y) { +; CHECK-LABEL: mand8_mem: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb (%rdi), %k0 +; CHECK-NEXT: kmovb (%rsi), %k1 ; CHECK-NEXT: kandb %k1, %k0, %k2 ; CHECK-NEXT: kxorb %k1, %k0, %k0 ; CHECK-NEXT: korb %k0, %k2, %k0 ; CHECK-NEXT: kmovb %k0, %eax ; CHECK-NEXT: retq - %ma = bitcast i8 %x to <8 x i1> - %mb = bitcast i8 %y to <8 x i1> + %ma = load <8 x i1>, <8 x i1>* %x + %mb = load <8 x i1>, <8 x i1>* %y %mc = and <8 x i1> %ma, %mb %md = xor <8 x i1> %ma, %mb %me = or <8 x i1> %mc, %md Index: test/CodeGen/X86/vector-shuffle-128-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v4.ll +++ test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1869,48 +1869,34 @@ define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: mask_v4f32_0127: ; SSE2: # BB#0: -; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 -; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 -; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: mask_v4f32_0127: ; SSE3: # BB#0: -; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 -; SSE3-NEXT: andps {{.*}}(%rip), %xmm1 -; SSE3-NEXT: orps %xmm1, %xmm0 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: mask_v4f32_0127: ; SSSE3: # BB#0: -; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: orps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mask_v4f32_0127: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: mask_v4f32_0127: -; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: mask_v4f32_0127: -; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3] -; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: mask_v4f32_0127: +; AVX: # BB#0: +; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] +; AVX-NEXT: retq %1 = bitcast <4 x float> %a to <2 x i64> %2 = bitcast <4 x float> %b to <2 x i64> %3 = and <2 x i64> %1, @@ -1923,47 +1909,38 @@ define <4 x i32> @mask_v4i32_0127(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: mask_v4i32_0127: ; SSE2: # BB#0: -; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 -; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 -; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: mask_v4i32_0127: ; SSE3: # BB#0: -; SSE3-NEXT: andps {{.*}}(%rip), %xmm0 -; SSE3-NEXT: andps {{.*}}(%rip), %xmm1 -; SSE3-NEXT: orps %xmm1, %xmm0 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: mask_v4i32_0127: ; SSSE3: # BB#0: -; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: orps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mask_v4i32_0127: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: mask_v4i32_0127: ; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: mask_v4i32_0127: ; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3] -; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; AVX2-NEXT: retq %1 = bitcast <4 x i32> %a to <2 x i64> %2 = bitcast <4 x i32> %b to <2 x i64> Index: test/CodeGen/X86/vector-shuffle-128-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v8.ll +++ test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2140,40 +2140,31 @@ define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: mask_v8i16_012345ef: ; SSE2: # BB#0: -; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 -; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 -; SSE2-NEXT: orps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: mask_v8i16_012345ef: ; SSSE3: # BB#0: -; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1 -; SSSE3-NEXT: orps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mask_v8i16_012345ef: ; SSE41: # BB#0: -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] -; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] ; SSE41-NEXT: retq ; ; AVX1-LABEL: mask_v8i16_012345ef: ; AVX1: # BB#0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: mask_v8i16_012345ef: ; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3] -; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3] -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] ; AVX2-NEXT: retq %1 = bitcast <8 x i16> %a to <2 x i64> %2 = bitcast <8 x i16> %b to <2 x i64> Index: test/CodeGen/X86/widen_bitops-0.ll =================================================================== --- test/CodeGen/X86/widen_bitops-0.ll +++ test/CodeGen/X86/widen_bitops-0.ll @@ -9,24 +9,14 @@ define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind { ; X32-SSE-LABEL: and_i24_as_v3i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pand %xmm0, %xmm1 -; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movd %esi, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: movd %edi, %xmm1 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; X64-SSE-NEXT: pand %xmm0, %xmm1 -; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: andl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -38,24 +28,14 @@ define i24 @xor_i24_as_v3i8(i24 %a, i24 %b) nounwind { ; X32-SSE-LABEL: xor_i24_as_v3i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pxor %xmm0, %xmm1 -; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movd %esi, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: movd %edi, %xmm1 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; X64-SSE-NEXT: pxor %xmm0, %xmm1 -; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: xorl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -67,24 +47,14 @@ define i24 @or_i24_as_v3i8(i24 %a, i24 %b) nounwind { ; X32-SSE-LABEL: or_i24_as_v3i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: por %xmm0, %xmm1 -; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_i24_as_v3i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movd %esi, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: movd %edi, %xmm1 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; X64-SSE-NEXT: por %xmm0, %xmm1 -; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: orl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <3 x i8> %2 = bitcast i24 %b to <3 x i8> @@ -100,186 +70,14 @@ define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind { ; X32-SSE-LABEL: and_i24_as_v8i3: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: movl 12(%ebp), %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movl 8(%ebp), %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X32-SSE-NEXT: pxor %xmm2, %xmm2 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7] -; X32-SSE-NEXT: pand %xmm1, %xmm0 -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X32-SSE-NEXT: shll $16, %ecx -; X32-SSE-NEXT: movzwl (%esp), %eax -; X32-SSE-NEXT: orl %ecx, %eax -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %esi -; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %edi -; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $3, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $6, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $9, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $15, %eax -; X64-SSE-NEXT: movzwl %ax, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $3, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movl %ecx, %esi -; X64-SSE-NEXT: andl $7, %esi -; X64-SSE-NEXT: movd %esi, %xmm1 -; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $6, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $9, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $12, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1 -; X64-SSE-NEXT: shrl $15, %ecx -; X64-SSE-NEXT: movzwl %cx, %ecx -; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1 -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1 -; X64-SSE-NEXT: pand %xmm0, %xmm1 -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: shll $16, %ecx -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: orl %ecx, %eax +; X64-SSE-NEXT: andl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -291,186 +89,14 @@ define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind { ; X32-SSE-LABEL: xor_i24_as_v8i3: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: movl 12(%ebp), %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movl 8(%ebp), %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X32-SSE-NEXT: pxor %xmm2, %xmm2 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7] -; X32-SSE-NEXT: pxor %xmm1, %xmm0 -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X32-SSE-NEXT: shll $16, %ecx -; X32-SSE-NEXT: movzwl (%esp), %eax -; X32-SSE-NEXT: orl %ecx, %eax -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %esi -; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %edi -; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $3, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $6, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $9, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $15, %eax -; X64-SSE-NEXT: movzwl %ax, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $3, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movl %ecx, %esi -; X64-SSE-NEXT: andl $7, %esi -; X64-SSE-NEXT: movd %esi, %xmm1 -; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $6, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $9, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $12, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1 -; X64-SSE-NEXT: shrl $15, %ecx -; X64-SSE-NEXT: movzwl %cx, %ecx -; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1 -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1 -; X64-SSE-NEXT: pxor %xmm0, %xmm1 -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: shll $16, %ecx -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: orl %ecx, %eax +; X64-SSE-NEXT: xorl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -482,186 +108,14 @@ define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind { ; X32-SSE-LABEL: or_i24_as_v8i3: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: movl 12(%ebp), %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movl 8(%ebp), %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X32-SSE-NEXT: pxor %xmm2, %xmm2 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7] -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7] -; X32-SSE-NEXT: por %xmm1, %xmm0 -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X32-SSE-NEXT: shll $16, %ecx -; X32-SSE-NEXT: movzwl (%esp), %eax -; X32-SSE-NEXT: orl %ecx, %eax -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_i24_as_v8i3: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %esi -; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %edi -; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $3, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $6, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $9, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $15, %eax -; X64-SSE-NEXT: movzwl %ax, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $3, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movl %ecx, %esi -; X64-SSE-NEXT: andl $7, %esi -; X64-SSE-NEXT: movd %esi, %xmm1 -; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $6, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $9, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1 -; X64-SSE-NEXT: movl %ecx, %edx -; X64-SSE-NEXT: shrl $12, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1 -; X64-SSE-NEXT: shrl $15, %ecx -; X64-SSE-NEXT: movzwl %cx, %ecx -; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1 -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1 -; X64-SSE-NEXT: por %xmm0, %xmm1 -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: shll $16, %ecx -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: orl %ecx, %eax +; X64-SSE-NEXT: orl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i24 %a to <8 x i3> %2 = bitcast i24 %b to <8 x i3> @@ -677,22 +131,16 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { ; X32-SSE-LABEL: and_v3i8_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: movd %xmm0, %eax ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: movd %xmm0, %ecx -; X32-SSE-NEXT: andl %eax, %ecx -; X32-SSE-NEXT: movd %ecx, %xmm0 -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X32-SSE-NEXT: pextrb $0, %xmm0, %eax -; X32-SSE-NEXT: pextrb $4, %xmm0, %edx -; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pand %xmm0, %xmm1 +; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: pextrb $4, %xmm1, %edx +; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_v3i8_as_i24: @@ -700,20 +148,13 @@ ; X64-SSE-NEXT: movd %ecx, %xmm0 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u> -; X64-SSE-NEXT: pshufb %xmm1, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0 -; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0 -; X64-SSE-NEXT: pshufb %xmm1, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: andl %eax, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: pextrb $0, %xmm0, %eax -; X64-SSE-NEXT: pextrb $4, %xmm0, %edx -; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1 +; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1 +; X64-SSE-NEXT: pand %xmm0, %xmm1 +; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: pextrb $4, %xmm1, %edx +; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx ; X64-SSE-NEXT: retq %1 = bitcast <3 x i8> %a to i24 %2 = bitcast <3 x i8> %b to i24 @@ -725,22 +166,16 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { ; X32-SSE-LABEL: xor_v3i8_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: movd %xmm0, %eax ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: movd %xmm0, %ecx -; X32-SSE-NEXT: xorl %eax, %ecx -; X32-SSE-NEXT: movd %ecx, %xmm0 -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X32-SSE-NEXT: pextrb $0, %xmm0, %eax -; X32-SSE-NEXT: pextrb $4, %xmm0, %edx -; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pxor %xmm0, %xmm1 +; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: pextrb $4, %xmm1, %edx +; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_v3i8_as_i24: @@ -748,20 +183,13 @@ ; X64-SSE-NEXT: movd %ecx, %xmm0 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u> -; X64-SSE-NEXT: pshufb %xmm1, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0 -; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0 -; X64-SSE-NEXT: pshufb %xmm1, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: xorl %eax, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: pextrb $0, %xmm0, %eax -; X64-SSE-NEXT: pextrb $4, %xmm0, %edx -; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1 +; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1 +; X64-SSE-NEXT: pxor %xmm0, %xmm1 +; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: pextrb $4, %xmm1, %edx +; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx ; X64-SSE-NEXT: retq %1 = bitcast <3 x i8> %a to i24 %2 = bitcast <3 x i8> %b to i24 @@ -773,22 +201,16 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind { ; X32-SSE-LABEL: or_v3i8_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: movd %xmm0, %eax ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0 -; X32-SSE-NEXT: movd %xmm0, %ecx -; X32-SSE-NEXT: orl %eax, %ecx -; X32-SSE-NEXT: movd %ecx, %xmm0 -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X32-SSE-NEXT: pextrb $0, %xmm0, %eax -; X32-SSE-NEXT: pextrb $4, %xmm0, %edx -; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0 +; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm1 +; X32-SSE-NEXT: por %xmm0, %xmm1 +; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: pextrb $4, %xmm1, %edx +; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_v3i8_as_i24: @@ -796,20 +218,13 @@ ; X64-SSE-NEXT: movd %ecx, %xmm0 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u> -; X64-SSE-NEXT: pshufb %xmm1, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0 -; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0 -; X64-SSE-NEXT: pshufb %xmm1, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: orl %eax, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: pextrb $0, %xmm0, %eax -; X64-SSE-NEXT: pextrb $4, %xmm0, %edx -; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx +; X64-SSE-NEXT: movd %edi, %xmm1 +; X64-SSE-NEXT: pinsrd $1, %esi, %xmm1 +; X64-SSE-NEXT: pinsrd $2, %edx, %xmm1 +; X64-SSE-NEXT: por %xmm0, %xmm1 +; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: pextrb $4, %xmm1, %edx +; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx ; X64-SSE-NEXT: retq %1 = bitcast <3 x i8> %a to i24 %2 = bitcast <3 x i8> %b to i24 @@ -825,186 +240,12 @@ define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { ; X32-SSE-LABEL: and_v8i3_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: shll $16, %eax -; X32-SSE-NEXT: movzwl (%esp), %ecx -; X32-SSE-NEXT: orl %eax, %ecx -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X32-SSE-NEXT: shll $16, %edx -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: orl %edx, %eax -; X32-SSE-NEXT: andl %ecx, %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X32-SSE-NEXT: pxor %xmm0, %xmm0 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: andps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_v8i3_as_i24: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $7, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: shll $16, %eax -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: orl %eax, %ecx -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: shll $16, %eax -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx -; X64-SSE-NEXT: orl %eax, %edx -; X64-SSE-NEXT: andl %ecx, %edx -; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %edx -; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $3, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $6, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $9, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $15, %eax -; X64-SSE-NEXT: movzwl %ax, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: andps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <8 x i3> %a to i24 %2 = bitcast <8 x i3> %b to i24 @@ -1016,186 +257,12 @@ define <8 x i3> @xor_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { ; X32-SSE-LABEL: xor_v8i3_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: shll $16, %eax -; X32-SSE-NEXT: movzwl (%esp), %ecx -; X32-SSE-NEXT: orl %eax, %ecx -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X32-SSE-NEXT: shll $16, %edx -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: orl %edx, %eax -; X32-SSE-NEXT: xorl %ecx, %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X32-SSE-NEXT: pxor %xmm0, %xmm0 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: xorps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_v8i3_as_i24: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $7, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: shll $16, %eax -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: orl %eax, %ecx -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: shll $16, %eax -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx -; X64-SSE-NEXT: orl %eax, %edx -; X64-SSE-NEXT: xorl %ecx, %edx -; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %edx -; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $3, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $6, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $9, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $15, %eax -; X64-SSE-NEXT: movzwl %ax, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: xorps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <8 x i3> %a to i24 %2 = bitcast <8 x i3> %b to i24 @@ -1207,186 +274,12 @@ define <8 x i3> @or_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind { ; X32-SSE-LABEL: or_v8i3_as_i24: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: shll $16, %eax -; X32-SSE-NEXT: movzwl (%esp), %ecx -; X32-SSE-NEXT: orl %eax, %ecx -; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X32-SSE-NEXT: shll $16, %edx -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: orl %edx, %eax -; X32-SSE-NEXT: orl %ecx, %eax -; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: shrl $16, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $3, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $7, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $6, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $9, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $7, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $15, %eax -; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X32-SSE-NEXT: pxor %xmm0, %xmm0 -; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: orps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_v8i3_as_i24: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $7, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: shll $16, %eax -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx -; X64-SSE-NEXT: orl %eax, %ecx -; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: shll $16, %eax -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx -; X64-SSE-NEXT: orl %eax, %edx -; X64-SSE-NEXT: orl %ecx, %edx -; X64-SSE-NEXT: movw %dx, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: shrl $16, %edx -; X64-SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $3, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $7, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $6, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $9, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $7, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $15, %eax -; X64-SSE-NEXT: movzwl %ax, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: orps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <8 x i3> %a to i24 %2 = bitcast <8 x i3> %b to i24 Index: test/CodeGen/X86/widen_bitops-1.ll =================================================================== --- test/CodeGen/X86/widen_bitops-1.ll +++ test/CodeGen/X86/widen_bitops-1.ll @@ -9,24 +9,14 @@ define i32 @and_i32_as_v4i8(i32 %a, i32 %b) nounwind { ; X32-SSE-LABEL: and_i32_as_v4i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %eax -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pand %xmm0, %xmm1 -; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: popl %ecx +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movd %esi, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: movd %edi, %xmm1 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; X64-SSE-NEXT: pand %xmm0, %xmm1 -; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: andl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -38,24 +28,14 @@ define i32 @xor_i32_as_v4i8(i32 %a, i32 %b) nounwind { ; X32-SSE-LABEL: xor_i32_as_v4i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %eax -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pxor %xmm0, %xmm1 -; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: popl %ecx +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movd %esi, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: movd %edi, %xmm1 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; X64-SSE-NEXT: pxor %xmm0, %xmm1 -; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: xorl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -67,24 +47,14 @@ define i32 @or_i32_as_v4i8(i32 %a, i32 %b) nounwind { ; X32-SSE-LABEL: or_i32_as_v4i8: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %eax -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; X32-SSE-NEXT: por %xmm0, %xmm1 -; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: popl %ecx +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_i32_as_v4i8: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movd %esi, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-SSE-NEXT: movd %edi, %xmm1 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero -; X64-SSE-NEXT: por %xmm0, %xmm1 -; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] -; X64-SSE-NEXT: movd %xmm1, %eax +; X64-SSE-NEXT: orl %esi, %edi +; X64-SSE-NEXT: movl %edi, %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <4 x i8> %2 = bitcast i32 %b to <4 x i8> @@ -100,186 +70,14 @@ define i32 @and_i32_as_v8i4(i32 %a, i32 %b) nounwind { ; X32-SSE-LABEL: and_i32_as_v8i4: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: movl 12(%ebp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X32-SSE-NEXT: movl 8(%ebp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1 -; X32-SSE-NEXT: pand %xmm0, %xmm1 -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movl (%esp), %eax -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: andl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $4, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movl %esi, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $8, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $12, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $16, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $20, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $24, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: shrl $28, %esi -; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0 +; X64-SSE-NEXT: andl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $4, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movl %edi, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm1 -; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $8, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $12, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $16, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $20, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $24, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1 -; X64-SSE-NEXT: shrl $28, %edi -; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1 -; X64-SSE-NEXT: pand %xmm0, %xmm1 -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -291,186 +89,14 @@ define i32 @xor_i32_as_v8i4(i32 %a, i32 %b) nounwind { ; X32-SSE-LABEL: xor_i32_as_v8i4: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: movl 12(%ebp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X32-SSE-NEXT: movl 8(%ebp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1 -; X32-SSE-NEXT: pxor %xmm0, %xmm1 -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movl (%esp), %eax -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $4, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movl %esi, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $8, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $12, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $16, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $20, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $24, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: shrl $28, %esi -; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $4, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movl %edi, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm1 -; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1 +; X64-SSE-NEXT: xorl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $8, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $12, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $16, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $20, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $24, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1 -; X64-SSE-NEXT: shrl $28, %edi -; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1 -; X64-SSE-NEXT: pxor %xmm0, %xmm1 -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -482,186 +108,14 @@ define i32 @or_i32_as_v8i4(i32 %a, i32 %b) nounwind { ; X32-SSE-LABEL: or_i32_as_v8i4: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: movl 12(%ebp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X32-SSE-NEXT: movl 8(%ebp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm1 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1 -; X32-SSE-NEXT: por %xmm0, %xmm1 -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movl (%esp), %eax -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE-NEXT: orl {{[0-9]+}}(%esp), %eax ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_i32_as_v8i4: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $4, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movl %esi, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $8, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $12, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $16, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $20, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0 -; X64-SSE-NEXT: movl %esi, %eax -; X64-SSE-NEXT: shrl $24, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0 -; X64-SSE-NEXT: shrl $28, %esi -; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $4, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movl %edi, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm1 -; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $8, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1 +; X64-SSE-NEXT: orl %esi, %edi ; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $12, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $16, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $20, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1 -; X64-SSE-NEXT: movl %edi, %eax -; X64-SSE-NEXT: shrl $24, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1 -; X64-SSE-NEXT: shrl $28, %edi -; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1 -; X64-SSE-NEXT: por %xmm0, %xmm1 -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: retq %1 = bitcast i32 %a to <8 x i4> %2 = bitcast i32 %b to <8 x i4> @@ -677,28 +131,12 @@ define <4 x i8> @and_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind { ; X32-SSE-LABEL: and_v4i8_as_i32: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; X32-SSE-NEXT: pshufb %xmm2, %xmm1 -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: pshufb %xmm2, %xmm0 -; X32-SSE-NEXT: movd %xmm0, %ecx -; X32-SSE-NEXT: andl %eax, %ecx -; X32-SSE-NEXT: movd %ecx, %xmm0 -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: andps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_v4i8_as_i32: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; X64-SSE-NEXT: pshufb %xmm2, %xmm1 -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: pshufb %xmm2, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: andl %eax, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-SSE-NEXT: andps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <4 x i8> %a to i32 %2 = bitcast <4 x i8> %b to i32 @@ -710,28 +148,12 @@ define <4 x i8> @xor_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind { ; X32-SSE-LABEL: xor_v4i8_as_i32: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; X32-SSE-NEXT: pshufb %xmm2, %xmm1 -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: pshufb %xmm2, %xmm0 -; X32-SSE-NEXT: movd %xmm0, %ecx -; X32-SSE-NEXT: xorl %eax, %ecx -; X32-SSE-NEXT: movd %ecx, %xmm0 -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: xorps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_v4i8_as_i32: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; X64-SSE-NEXT: pshufb %xmm2, %xmm1 -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: pshufb %xmm2, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: xorl %eax, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-SSE-NEXT: xorps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <4 x i8> %a to i32 %2 = bitcast <4 x i8> %b to i32 @@ -743,28 +165,12 @@ define <4 x i8> @or_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind { ; X32-SSE-LABEL: or_v4i8_as_i32: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: subl $12, %esp -; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; X32-SSE-NEXT: pshufb %xmm2, %xmm1 -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: pshufb %xmm2, %xmm0 -; X32-SSE-NEXT: movd %xmm0, %ecx -; X32-SSE-NEXT: orl %eax, %ecx -; X32-SSE-NEXT: movd %ecx, %xmm0 -; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X32-SSE-NEXT: addl $12, %esp +; X32-SSE-NEXT: orps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_v4i8_as_i32: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> -; X64-SSE-NEXT: pshufb %xmm2, %xmm1 -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: pshufb %xmm2, %xmm0 -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: orl %eax, %ecx -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; X64-SSE-NEXT: orps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <4 x i8> %a to i32 %2 = bitcast <4 x i8> %b to i32 @@ -780,174 +186,12 @@ define <8 x i4> @and_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind { ; X32-SSE-LABEL: and_v8i4_as_i32: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: andl (%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: andps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: and_v8i4_as_i32: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: pextrw $7, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: andl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $4, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $15, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $8, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $16, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $20, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $24, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $28, %eax -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: andps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <8 x i4> %a to i32 %2 = bitcast <8 x i4> %b to i32 @@ -959,174 +203,12 @@ define <8 x i4> @xor_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind { ; X32-SSE-LABEL: xor_v8i4_as_i32: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: xorl (%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: xorps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: xor_v8i4_as_i32: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: pextrw $7, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: xorl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $4, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $15, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $8, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $16, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $20, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $24, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $28, %eax -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: xorps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <8 x i4> %a to i32 %2 = bitcast <8 x i4> %b to i32 @@ -1138,174 +220,12 @@ define <8 x i4> @or_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind { ; X32-SSE-LABEL: or_v8i4_as_i32: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pushl %ebp -; X32-SSE-NEXT: movl %esp, %ebp -; X32-SSE-NEXT: andl $-8, %esp -; X32-SSE-NEXT: subl $24, %esp -; X32-SSE-NEXT: pextrw $7, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $6, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $5, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $4, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $3, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $2, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $1, %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pextrw $7, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $6, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $5, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $4, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $3, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $2, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: pextrw $1, %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: andl $15, %eax -; X32-SSE-NEXT: movb %al, (%esp) -; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE-NEXT: orl (%esp), %eax -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $4, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: movl %eax, %edx -; X32-SSE-NEXT: andl $15, %edx -; X32-SSE-NEXT: movd %edx, %xmm0 -; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $8, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $12, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $16, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $20, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X32-SSE-NEXT: movl %eax, %ecx -; X32-SSE-NEXT: shrl $24, %ecx -; X32-SSE-NEXT: andl $15, %ecx -; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X32-SSE-NEXT: shrl $28, %eax -; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0 -; X32-SSE-NEXT: movl %ebp, %esp -; X32-SSE-NEXT: popl %ebp +; X32-SSE-NEXT: orps %xmm1, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: or_v8i4_as_i32: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: pextrw $7, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm0, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $7, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $6, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $5, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $4, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $3, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $2, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: pextrw $1, %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movd %xmm1, %eax -; X64-SSE-NEXT: andl $15, %eax -; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: orl -{{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $4, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: movl %eax, %edx -; X64-SSE-NEXT: andl $15, %edx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $8, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $12, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $16, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $20, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0 -; X64-SSE-NEXT: movl %eax, %ecx -; X64-SSE-NEXT: shrl $24, %ecx -; X64-SSE-NEXT: andl $15, %ecx -; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0 -; X64-SSE-NEXT: shrl $28, %eax -; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0 +; X64-SSE-NEXT: orps %xmm1, %xmm0 ; X64-SSE-NEXT: retq %1 = bitcast <8 x i4> %a to i32 %2 = bitcast <8 x i4> %b to i32