Changeset View
Changeset View
Standalone View
Standalone View
test/CodeGen/X86/vector-compare-all_of.ll
Show First 20 Lines • Show All 914 Lines • ▼ Show 20 Lines | ; AVX512-NEXT: retq | ||||
%11 = extractelement <32 x i8> %10, i32 0 | %11 = extractelement <32 x i8> %10, i32 0 | ||||
ret i8 %11 | ret i8 %11 | ||||
} | } | ||||
define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { | define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { | ||||
; SSE-LABEL: bool_reduction_v2f64: | ; SSE-LABEL: bool_reduction_v2f64: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: cmpltpd %xmm0, %xmm1 | ; SSE-NEXT: cmpltpd %xmm0, %xmm1 | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] | ; SSE-NEXT: movmskpd %xmm1, %eax | ||||
; SSE-NEXT: pand %xmm1, %xmm0 | ; SSE-NEXT: cmpb $3, %al | ||||
; SSE-NEXT: pextrb $0, %xmm0, %eax | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: bool_reduction_v2f64: | ; AVX-LABEL: bool_reduction_v2f64: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 | ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 | ||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] | ; AVX-NEXT: vmovmskpd %xmm0, %eax | ||||
; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0 | ; AVX-NEXT: cmpb $3, %al | ||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax | ; AVX-NEXT: sete %al | ||||
; AVX-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v2f64: | ; AVX512-LABEL: bool_reduction_v2f64: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1 | ; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ||||
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} | ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} | ||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] | ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] | ||||
; AVX512-NEXT: vpsllq $63, %xmm0, %xmm0 | ; AVX512-NEXT: vpsllq $63, %xmm0, %xmm0 | ||||
; AVX512-NEXT: vptestmq %xmm0, %xmm0, %k0 {%k1} | ; AVX512-NEXT: vptestmq %xmm0, %xmm0, %k0 {%k1} | ||||
; AVX512-NEXT: kmovd %k0, %eax | ; AVX512-NEXT: kmovd %k0, %eax | ||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax | ; AVX512-NEXT: # kill: def $al killed $al killed $eax | ||||
; AVX512-NEXT: retq | ; AVX512-NEXT: retq | ||||
%a = fcmp ogt <2 x double> %x, %y | %a = fcmp ogt <2 x double> %x, %y | ||||
%b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> | %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 undef> | ||||
%c = and <2 x i1> %a, %b | %c = and <2 x i1> %a, %b | ||||
%d = extractelement <2 x i1> %c, i32 0 | %d = extractelement <2 x i1> %c, i32 0 | ||||
ret i1 %d | ret i1 %d | ||||
} | } | ||||
define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { | define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { | ||||
; SSE-LABEL: bool_reduction_v4f32: | ; SSE-LABEL: bool_reduction_v4f32: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: cmpeqps %xmm1, %xmm0 | ; SSE-NEXT: cmpeqps %xmm1, %xmm0 | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] | ; SSE-NEXT: movmskps %xmm0, %eax | ||||
; SSE-NEXT: pand %xmm0, %xmm1 | ; SSE-NEXT: cmpb $15, %al | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: pand %xmm1, %xmm0 | |||||
; SSE-NEXT: pextrb $0, %xmm0, %eax | |||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: bool_reduction_v4f32: | ; AVX-LABEL: bool_reduction_v4f32: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 | ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 | ||||
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] | ; AVX-NEXT: vmovmskps %xmm0, %eax | ||||
; AVX-NEXT: vandpd %xmm0, %xmm1, %xmm0 | ; AVX-NEXT: cmpb $15, %al | ||||
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; AVX-NEXT: sete %al | ||||
; AVX-NEXT: vandpd %xmm0, %xmm1, %xmm0 | |||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v4f32: | ; AVX512-LABEL: bool_reduction_v4f32: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k1 | ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] | ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] | ||||
Show All 15 Lines | ; AVX512-NEXT: retq | ||||
ret i1 %d | ret i1 %d | ||||
} | } | ||||
define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { | define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { | ||||
; SSE-LABEL: bool_reduction_v4f64: | ; SSE-LABEL: bool_reduction_v4f64: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: cmplepd %xmm1, %xmm3 | ; SSE-NEXT: cmplepd %xmm1, %xmm3 | ||||
; SSE-NEXT: cmplepd %xmm0, %xmm2 | ; SSE-NEXT: cmplepd %xmm0, %xmm2 | ||||
; SSE-NEXT: movapd %xmm2, %xmm0 | ; SSE-NEXT: packssdw %xmm3, %xmm2 | ||||
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] | ; SSE-NEXT: movmskps %xmm2, %eax | ||||
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2] | ; SSE-NEXT: cmpb $15, %al | ||||
; SSE-NEXT: andps %xmm0, %xmm3 | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] | |||||
; SSE-NEXT: pand %xmm3, %xmm0 | |||||
; SSE-NEXT: pextrb $0, %xmm0, %eax | |||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: bool_reduction_v4f64: | ; AVX-LABEL: bool_reduction_v4f64: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 | ; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 | ||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 | ; AVX-NEXT: vmovmskpd %ymm0, %eax | ||||
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 | ; AVX-NEXT: cmpb $15, %al | ||||
; AVX-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 | ; AVX-NEXT: sete %al | ||||
; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 | |||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | |||||
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX-NEXT: vzeroupper | ; AVX-NEXT: vzeroupper | ||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v4f64: | ; AVX512-LABEL: bool_reduction_v4f64: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1 | ; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ||||
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines | |||||
define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { | define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { | ||||
; SSE-LABEL: bool_reduction_v2i64: | ; SSE-LABEL: bool_reduction_v2i64: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] | ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] | ||||
; SSE-NEXT: pxor %xmm2, %xmm1 | ; SSE-NEXT: pxor %xmm2, %xmm1 | ||||
; SSE-NEXT: pxor %xmm2, %xmm0 | ; SSE-NEXT: pxor %xmm2, %xmm0 | ||||
; SSE-NEXT: pcmpgtq %xmm1, %xmm0 | ; SSE-NEXT: pcmpgtq %xmm1, %xmm0 | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] | ; SSE-NEXT: movmskpd %xmm0, %eax | ||||
; SSE-NEXT: pand %xmm0, %xmm1 | ; SSE-NEXT: cmpb $3, %al | ||||
; SSE-NEXT: pextrb $0, %xmm1, %eax | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: bool_reduction_v2i64: | ; AVX-LABEL: bool_reduction_v2i64: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] | ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] | ||||
; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 | ; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 | ||||
; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 | ; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 | ||||
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 | ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 | ||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] | ; AVX-NEXT: vmovmskpd %xmm0, %eax | ||||
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 | ; AVX-NEXT: cmpb $3, %al | ||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax | ; AVX-NEXT: sete %al | ||||
; AVX-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v2i64: | ; AVX512-LABEL: bool_reduction_v2i64: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 | ; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ||||
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} | ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} | ||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] | ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] | ||||
Show All 10 Lines | |||||
} | } | ||||
define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { | define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { | ||||
; SSE-LABEL: bool_reduction_v4i32: | ; SSE-LABEL: bool_reduction_v4i32: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: pcmpeqd %xmm1, %xmm0 | ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 | ||||
; SSE-NEXT: pcmpeqd %xmm1, %xmm1 | ; SSE-NEXT: pcmpeqd %xmm1, %xmm1 | ||||
; SSE-NEXT: pxor %xmm0, %xmm1 | ; SSE-NEXT: pxor %xmm0, %xmm1 | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] | ; SSE-NEXT: movmskps %xmm1, %eax | ||||
; SSE-NEXT: pandn %xmm1, %xmm0 | ; SSE-NEXT: cmpb $15, %al | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: pand %xmm0, %xmm1 | |||||
; SSE-NEXT: pextrb $0, %xmm1, %eax | |||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: bool_reduction_v4i32: | ; AVX-LABEL: bool_reduction_v4i32: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 | ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 | ||||
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 | ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 | ||||
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm1 | ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 | ||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] | ; AVX-NEXT: vmovmskps %xmm0, %eax | ||||
; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 | ; AVX-NEXT: cmpb $15, %al | ||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; AVX-NEXT: sete %al | ||||
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v4i32: | ; AVX512-LABEL: bool_reduction_v4i32: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 | ; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ||||
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] | ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] | ||||
Show All 14 Lines | ; AVX512-NEXT: retq | ||||
%d = extractelement <4 x i1> %c, i32 0 | %d = extractelement <4 x i1> %c, i32 0 | ||||
ret i1 %d | ret i1 %d | ||||
} | } | ||||
define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { | define i1 @bool_reduction_v8i16(<8 x i16> %x, <8 x i16> %y) { | ||||
; SSE-LABEL: bool_reduction_v8i16: | ; SSE-LABEL: bool_reduction_v8i16: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: pcmpgtw %xmm0, %xmm1 | ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] | ; SSE-NEXT: packsswb %xmm0, %xmm1 | ||||
; SSE-NEXT: pand %xmm1, %xmm0 | ; SSE-NEXT: pmovmskb %xmm1, %eax | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; SSE-NEXT: cmpb $-1, %al | ||||
; SSE-NEXT: pand %xmm0, %xmm1 | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: movdqa %xmm1, %xmm0 | |||||
; SSE-NEXT: psrld $16, %xmm0 | |||||
; SSE-NEXT: pand %xmm1, %xmm0 | |||||
; SSE-NEXT: pextrb $0, %xmm0, %eax | |||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: bool_reduction_v8i16: | ; AVX-LABEL: bool_reduction_v8i16: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 | ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 | ||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] | ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 | ||||
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 | ; AVX-NEXT: vpmovmskb %xmm0, %eax | ||||
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; AVX-NEXT: cmpb $-1, %al | ||||
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 | ; AVX-NEXT: sete %al | ||||
; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 | |||||
; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v8i16: | ; AVX512-LABEL: bool_reduction_v8i16: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k1 | ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 | ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 | ||||
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} | ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} | ||||
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1 | ; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1 | ||||
Show All 22 Lines | ; AVX512-NEXT: retq | ||||
ret i1 %e | ret i1 %e | ||||
} | } | ||||
define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { | define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { | ||||
; SSE-LABEL: bool_reduction_v16i8: | ; SSE-LABEL: bool_reduction_v16i8: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: pcmpgtb %xmm1, %xmm0 | ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 | ||||
; SSE-NEXT: pmovmskb %xmm0, %eax | ; SSE-NEXT: pmovmskb %xmm0, %eax | ||||
; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF | ; SSE-NEXT: cmpw $-1, %ax | ||||
; SSE-NEXT: sete %al | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: negb %al | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX-LABEL: bool_reduction_v16i8: | ; AVX-LABEL: bool_reduction_v16i8: | ||||
; AVX: # %bb.0: | ; AVX: # %bb.0: | ||||
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 | ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 | ||||
; AVX-NEXT: vpmovmskb %xmm0, %eax | ; AVX-NEXT: vpmovmskb %xmm0, %eax | ||||
; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF | ; AVX-NEXT: cmpw $-1, %ax | ||||
; AVX-NEXT: sete %al | ; AVX-NEXT: sete %al | ||||
; AVX-NEXT: negb %al | |||||
; AVX-NEXT: retq | ; AVX-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v16i8: | ; AVX512-LABEL: bool_reduction_v16i8: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 | ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 | ||||
; AVX512-NEXT: kshiftrw $8, %k0, %k1 | ; AVX512-NEXT: kshiftrw $8, %k0, %k1 | ||||
; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} | ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} | ||||
; AVX512-NEXT: kshiftrw $4, %k0, %k1 | ; AVX512-NEXT: kshiftrw $4, %k0, %k1 | ||||
Show All 18 Lines | ; AVX512-NEXT: retq | ||||
ret i1 %f | ret i1 %f | ||||
} | } | ||||
define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { | define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { | ||||
; SSE-LABEL: bool_reduction_v4i64: | ; SSE-LABEL: bool_reduction_v4i64: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: pcmpgtq %xmm1, %xmm3 | ; SSE-NEXT: pcmpgtq %xmm1, %xmm3 | ||||
; SSE-NEXT: pcmpgtq %xmm0, %xmm2 | ; SSE-NEXT: pcmpgtq %xmm0, %xmm2 | ||||
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] | ; SSE-NEXT: packssdw %xmm3, %xmm2 | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] | ; SSE-NEXT: movmskps %xmm2, %eax | ||||
; SSE-NEXT: pand %xmm2, %xmm0 | ; SSE-NEXT: cmpb $15, %al | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: pand %xmm0, %xmm1 | |||||
; SSE-NEXT: pextrb $0, %xmm1, %eax | |||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX1-LABEL: bool_reduction_v4i64: | ; AVX1-LABEL: bool_reduction_v4i64: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | ||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 | ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 | ||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 | ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 | ||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 | ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 | ||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm1 | ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | ||||
; AVX1-NEXT: vpackssdw %xmm0, %xmm2, %xmm0 | ; AVX1-NEXT: vmovmskpd %ymm0, %eax | ||||
spatel: I think the other path prefers to use packss and a 128-bit movmsk for AVX1 here which could be… | |||||
RKSimonAuthorUnsubmitted Not Done ReplyInline ActionsYes a single packss to a movmskps is better - I think we can take some of the code from D59912 to tweak this. RKSimon: Yes a single packss to a movmskps is better - I think we can take some of the code from D59912… | |||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 | ; AVX1-NEXT: cmpb $15, %al | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; AVX1-NEXT: sete %al | ||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX1-NEXT: vzeroupper | ; AVX1-NEXT: vzeroupper | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-LABEL: bool_reduction_v4i64: | ; AVX2-LABEL: bool_reduction_v4i64: | ||||
; AVX2: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 | ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 | ||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 | ; AVX2-NEXT: vmovmskpd %ymm0, %eax | ||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 | ; AVX2-NEXT: cmpb $15, %al | ||||
; AVX2-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 | ; AVX2-NEXT: sete %al | ||||
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 | |||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | |||||
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX2-NEXT: vzeroupper | ; AVX2-NEXT: vzeroupper | ||||
; AVX2-NEXT: retq | ; AVX2-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v4i64: | ; AVX512-LABEL: bool_reduction_v4i64: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 | ; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 | ||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} | ||||
Show All 20 Lines | |||||
define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { | define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { | ||||
; SSE-LABEL: bool_reduction_v8i32: | ; SSE-LABEL: bool_reduction_v8i32: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: pminud %xmm1, %xmm3 | ; SSE-NEXT: pminud %xmm1, %xmm3 | ||||
; SSE-NEXT: pcmpeqd %xmm1, %xmm3 | ; SSE-NEXT: pcmpeqd %xmm1, %xmm3 | ||||
; SSE-NEXT: pminud %xmm0, %xmm2 | ; SSE-NEXT: pminud %xmm0, %xmm2 | ||||
; SSE-NEXT: pcmpeqd %xmm0, %xmm2 | ; SSE-NEXT: pcmpeqd %xmm0, %xmm2 | ||||
; SSE-NEXT: packssdw %xmm3, %xmm2 | ; SSE-NEXT: packssdw %xmm3, %xmm2 | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] | ; SSE-NEXT: packsswb %xmm0, %xmm2 | ||||
; SSE-NEXT: pand %xmm2, %xmm0 | ; SSE-NEXT: pmovmskb %xmm2, %eax | ||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; SSE-NEXT: cmpb $-1, %al | ||||
; SSE-NEXT: pand %xmm0, %xmm1 | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: movdqa %xmm1, %xmm0 | |||||
; SSE-NEXT: psrld $16, %xmm0 | |||||
; SSE-NEXT: pand %xmm1, %xmm0 | |||||
; SSE-NEXT: pextrb $0, %xmm0, %eax | |||||
; SSE-NEXT: # kill: def $al killed $al killed $eax | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX1-LABEL: bool_reduction_v8i32: | ; AVX1-LABEL: bool_reduction_v8i32: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | ||||
; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 | ; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 | ||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 | ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2 | ||||
; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1 | ; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1 | ||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 | ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 | ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] | ; AVX1-NEXT: vmovmskps %ymm0, %eax | ||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 | ; AVX1-NEXT: cmpb $-1, %al | ||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | ; AVX1-NEXT: sete %al | ||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 | |||||
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX1-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX1-NEXT: vzeroupper | ; AVX1-NEXT: vzeroupper | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-LABEL: bool_reduction_v8i32: | ; AVX2-LABEL: bool_reduction_v8i32: | ||||
; AVX2: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 | ; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1 | ||||
; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 | ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 | ||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 | ; AVX2-NEXT: vmovmskps %ymm0, %eax | ||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 | ; AVX2-NEXT: cmpb $-1, %al | ||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] | ; AVX2-NEXT: sete %al | ||||
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] | |||||
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 | |||||
; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 | |||||
; AVX2-NEXT: vpextrb $0, %xmm0, %eax | |||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax | |||||
; AVX2-NEXT: vzeroupper | ; AVX2-NEXT: vzeroupper | ||||
; AVX2-NEXT: retq | ; AVX2-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v8i32: | ; AVX512-LABEL: bool_reduction_v8i32: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k1 | ; AVX512-NEXT: vpcmpleud %ymm1, %ymm0, %k1 | ||||
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 | ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 | ||||
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} | ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} | ||||
Show All 25 Lines | |||||
define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { | define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { | ||||
; SSE-LABEL: bool_reduction_v16i16: | ; SSE-LABEL: bool_reduction_v16i16: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: pcmpeqw %xmm3, %xmm1 | ; SSE-NEXT: pcmpeqw %xmm3, %xmm1 | ||||
; SSE-NEXT: pcmpeqw %xmm2, %xmm0 | ; SSE-NEXT: pcmpeqw %xmm2, %xmm0 | ||||
; SSE-NEXT: packsswb %xmm1, %xmm0 | ; SSE-NEXT: packsswb %xmm1, %xmm0 | ||||
; SSE-NEXT: pmovmskb %xmm0, %eax | ; SSE-NEXT: pmovmskb %xmm0, %eax | ||||
; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF | ; SSE-NEXT: cmpw $-1, %ax | ||||
; SSE-NEXT: sete %al | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: negb %al | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX1-LABEL: bool_reduction_v16i16: | ; AVX1-LABEL: bool_reduction_v16i16: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | ||||
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 | ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 | ||||
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 | ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 | ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax | ; AVX1-NEXT: vpmovmskb %xmm0, %eax | ||||
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF | ; AVX1-NEXT: cmpw $-1, %ax | ||||
; AVX1-NEXT: sete %al | ; AVX1-NEXT: sete %al | ||||
; AVX1-NEXT: negb %al | |||||
; AVX1-NEXT: vzeroupper | ; AVX1-NEXT: vzeroupper | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-LABEL: bool_reduction_v16i16: | ; AVX2-LABEL: bool_reduction_v16i16: | ||||
; AVX2: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 | ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 | ||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 | ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 | ||||
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 | ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 | ||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax | ; AVX2-NEXT: vpmovmskb %xmm0, %eax | ||||
; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF | ; AVX2-NEXT: cmpw $-1, %ax | ||||
; AVX2-NEXT: sete %al | ; AVX2-NEXT: sete %al | ||||
; AVX2-NEXT: negb %al | |||||
; AVX2-NEXT: vzeroupper | ; AVX2-NEXT: vzeroupper | ||||
; AVX2-NEXT: retq | ; AVX2-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v16i16: | ; AVX512-LABEL: bool_reduction_v16i16: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 | ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 | ||||
; AVX512-NEXT: kshiftrw $8, %k0, %k1 | ; AVX512-NEXT: kshiftrw $8, %k0, %k1 | ||||
; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} | ; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} | ||||
Show All 18 Lines | ; AVX512-NEXT: retq | ||||
%e = and <16 x i1> %s4, %d | %e = and <16 x i1> %s4, %d | ||||
%f = extractelement <16 x i1> %e, i32 0 | %f = extractelement <16 x i1> %e, i32 0 | ||||
ret i1 %f | ret i1 %f | ||||
} | } | ||||
define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { | define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { | ||||
; SSE-LABEL: bool_reduction_v32i8: | ; SSE-LABEL: bool_reduction_v32i8: | ||||
; SSE: # %bb.0: | ; SSE: # %bb.0: | ||||
; SSE-NEXT: pcmpeqb %xmm2, %xmm0 | |||||
; SSE-NEXT: pcmpeqb %xmm3, %xmm1 | ; SSE-NEXT: pcmpeqb %xmm3, %xmm1 | ||||
; SSE-NEXT: pand %xmm0, %xmm1 | ; SSE-NEXT: pcmpeqb %xmm2, %xmm0 | ||||
; SSE-NEXT: pmovmskb %xmm1, %eax | ; SSE-NEXT: pand %xmm1, %xmm0 | ||||
; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF | ; SSE-NEXT: pmovmskb %xmm0, %eax | ||||
; SSE-NEXT: cmpw $-1, %ax | |||||
; SSE-NEXT: sete %al | ; SSE-NEXT: sete %al | ||||
; SSE-NEXT: negb %al | |||||
; SSE-NEXT: retq | ; SSE-NEXT: retq | ||||
; | ; | ||||
; AVX1-LABEL: bool_reduction_v32i8: | ; AVX1-LABEL: bool_reduction_v32i8: | ||||
; AVX1: # %bb.0: | ; AVX1: # %bb.0: | ||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 | ||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 | ||||
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 | ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2 | ||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 | ; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 | ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 | ||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax | ; AVX1-NEXT: vpmovmskb %xmm0, %eax | ||||
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF | ; AVX1-NEXT: cmpw $-1, %ax | ||||
; AVX1-NEXT: sete %al | ; AVX1-NEXT: sete %al | ||||
; AVX1-NEXT: negb %al | |||||
; AVX1-NEXT: vzeroupper | ; AVX1-NEXT: vzeroupper | ||||
; AVX1-NEXT: retq | ; AVX1-NEXT: retq | ||||
; | ; | ||||
; AVX2-LABEL: bool_reduction_v32i8: | ; AVX2-LABEL: bool_reduction_v32i8: | ||||
; AVX2: # %bb.0: | ; AVX2: # %bb.0: | ||||
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 | ; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 | ||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax | ; AVX2-NEXT: vpmovmskb %ymm0, %eax | ||||
; AVX2-NEXT: cmpl $-1, %eax | ; AVX2-NEXT: cmpl $-1, %eax | ||||
; AVX2-NEXT: sete %al | ; AVX2-NEXT: sete %al | ||||
; AVX2-NEXT: negb %al | |||||
; AVX2-NEXT: vzeroupper | ; AVX2-NEXT: vzeroupper | ||||
; AVX2-NEXT: retq | ; AVX2-NEXT: retq | ||||
; | ; | ||||
; AVX512-LABEL: bool_reduction_v32i8: | ; AVX512-LABEL: bool_reduction_v32i8: | ||||
; AVX512: # %bb.0: | ; AVX512: # %bb.0: | ||||
; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 | ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 | ||||
; AVX512-NEXT: kshiftrd $16, %k0, %k1 | ; AVX512-NEXT: kshiftrd $16, %k0, %k1 | ||||
; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} | ; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} | ||||
Show All 26 Lines |
I think the other path prefers to use packss and a 128-bit movmsk for AVX1 here which could be a slight win since it avoids ymm?