diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll @@ -0,0 +1,1178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 + +declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1) +declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1) + +define <4 x i1> @illegal_abs_unchanged(<4 x i8> %x) { +; AVX512-LABEL: illegal_abs_unchanged: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsb %xmm0, %xmm0 +; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512-NEXT: retq +; +; AVX2-LABEL: illegal_abs_unchanged: +; AVX2: # %bb.0: +; AVX2-NEXT: vpabsb %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX2-NEXT: retq +; +; SSE41-LABEL: illegal_abs_unchanged: +; SSE41: # %bb.0: +; SSE41-NEXT: pabsb %xmm0, %xmm0 +; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; SSE41-NEXT: retq +; +; SSE2-LABEL: illegal_abs_unchanged: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: pminub %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: retq + %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true) + %cmp = icmp eq <4 x i8> %abs, + ret <4 x i1> %cmp +} + +define <4 x i1> @illegal_abs_unchanged2(<4 x i8> %x) { +; AVX512-LABEL: illegal_abs_unchanged2: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsb %xmm0, %xmm0 +; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512-NEXT: retq +; +; AVX2-LABEL: illegal_abs_unchanged2: +; AVX2: # %bb.0: +; AVX2-NEXT: vpabsb %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX2-NEXT: retq +; +; SSE41-LABEL: illegal_abs_unchanged2: +; SSE41: # %bb.0: +; SSE41-NEXT: pabsb %xmm0, %xmm0 +; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero +; SSE41-NEXT: retq +; +; SSE2-LABEL: illegal_abs_unchanged2: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: pminub %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: retq + %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true) + %cmp = icmp ne <4 x i8> %abs, + ret <4 x i1> %cmp +} + +define <4 x i1> @illegal_abs_to_eq_or(<4 x i64> %x) { +; AVX512-LABEL: illegal_abs_to_eq_or: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsq %ymm0, %ymm0 +; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: illegal_abs_to_eq_or: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 +; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; SSE41-LABEL: illegal_abs_to_eq_or: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: psubq %xmm0, %xmm4 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: psubq %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: packssdw %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: illegal_abs_to_eq_or: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubq %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubq %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: packssdw %xmm3, %xmm0 +; SSE2-NEXT: retq + %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) + %cmp = icmp eq <4 x i64> %abs, + ret <4 x i1> %cmp +} + +define <4 x i64> @illegal_abs_to_eq_or_sext(<4 x i64> %x) { +; AVX512-LABEL: illegal_abs_to_eq_or_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsq %ymm0, %ymm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq +; +; AVX2-LABEL: illegal_abs_to_eq_or_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 +; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: illegal_abs_to_eq_or_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: psubq %xmm1, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: psubq %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: illegal_abs_to_eq_or_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubq %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubq %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: retq + %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) + %cmp = icmp eq <4 x i64> %abs, + %r = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %r +} + +define <4 x i1> @illegal_abs_to_ne_and(<4 x i64> %x) { +; AVX512-LABEL: illegal_abs_to_ne_and: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsq %ymm0, %ymm0 +; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: illegal_abs_to_ne_and: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 +; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; SSE41-LABEL: illegal_abs_to_ne_and: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: psubq %xmm0, %xmm4 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 +; SSE41-NEXT: psubq %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE41-NEXT: pxor %xmm3, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: pxor %xmm3, %xmm2 +; SSE41-NEXT: packssdw %xmm1, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: illegal_abs_to_ne_and: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubq %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubq %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: packssdw %xmm3, %xmm0 +; SSE2-NEXT: retq + %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) + %cmp = icmp ne <4 x i64> %abs, + ret <4 x i1> %cmp +} + +define <4 x i64> @illegal_abs_to_ne_and_sext(<4 x i64> %x) { +; AVX512-LABEL: illegal_abs_to_ne_and_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsq %ymm0, %ymm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: retq +; +; AVX2-LABEL: illegal_abs_to_ne_and_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 +; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: illegal_abs_to_ne_and_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: pxor %xmm3, %xmm3 +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: psubq %xmm1, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 +; SSE41-NEXT: psubq %xmm2, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE41-NEXT: pxor %xmm3, %xmm2 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 +; SSE41-NEXT: pxor %xmm3, %xmm1 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: illegal_abs_to_ne_and_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: psubq %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: psubq %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm3, %xmm1 +; SSE2-NEXT: retq + %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) + %cmp = icmp ne <4 x i64> %abs, + %r = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %r +} + +define <4 x i1> @legal_abs_eq_unchanged(<4 x i32> %x) { +; AVX512-LABEL: legal_abs_eq_unchanged: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsd %xmm0, %xmm0 +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX2-LABEL: legal_abs_eq_unchanged: +; AVX2: # %bb.0: +; AVX2-NEXT: vpabsd %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: legal_abs_eq_unchanged: +; SSE41: # %bb.0: +; SSE41-NEXT: pabsd %xmm0, %xmm0 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: legal_abs_eq_unchanged: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) + %cmp = icmp eq <4 x i32> %abs, + ret <4 x i1> %cmp +} + +define <4 x i32> @legal_abs_eq_unchanged_sext(<4 x i32> %x) { +; AVX512-LABEL: legal_abs_eq_unchanged_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsd %xmm0, %xmm0 +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX2-LABEL: legal_abs_eq_unchanged_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpabsd %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: legal_abs_eq_unchanged_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: pabsd %xmm0, %xmm0 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: legal_abs_eq_unchanged_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) + %cmp = icmp eq <4 x i32> %abs, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i1> @legal_abs_ne_unchangedd(<4 x i32> %x) { +; AVX512-LABEL: legal_abs_ne_unchangedd: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsd %xmm0, %xmm0 +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX2-LABEL: legal_abs_ne_unchangedd: +; AVX2: # %bb.0: +; AVX2-NEXT: vpabsd %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: legal_abs_ne_unchangedd: +; SSE41: # %bb.0: +; SSE41-NEXT: pabsd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: legal_abs_ne_unchangedd: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) + %cmp = icmp ne <4 x i32> %abs, + ret <4 x i1> %cmp +} + +define <4 x i32> @legal_abs_ne_unchangedd_sext(<4 x i32> %x) { +; AVX512-LABEL: legal_abs_ne_unchangedd_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpabsd %xmm0, %xmm0 +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq +; +; AVX2-LABEL: legal_abs_ne_unchangedd_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpabsd %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: legal_abs_ne_unchangedd_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: pabsd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: legal_abs_ne_unchangedd_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: retq + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) + %cmp = icmp ne <4 x i32> %abs, + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) { +; AVX512-LABEL: eq_or_to_abs_vec4x64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0 +; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; AVX512-NEXT: korw %k1, %k0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: eq_or_to_abs_vec4x64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_to_abs_vec4x64: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm3, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm3, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE41-NEXT: orps %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_to_abs_vec4x64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-NEXT: andps %xmm4, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] +; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-NEXT: andps %xmm3, %xmm0 +; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp eq <4 x i64> %x, + %cmp2 = icmp eq <4 x i64> %x, + %cmp = or <4 x i1> %cmp1, %cmp2 + ret <4 x i1> %cmp +} + +define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) { +; AVX512-LABEL: eq_or_to_abs_vec4x64_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k0 +; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; AVX512-NEXT: korw %k1, %k0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX2-LABEL: eq_or_to_abs_vec4x64_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_to_abs_vec4x64_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm3, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm3, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE41-NEXT: orps %xmm2, %xmm0 +; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] +; SSE41-NEXT: psllq $63, %xmm0 +; SSE41-NEXT: psrad $31, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_to_abs_vec4x64_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-NEXT: andps %xmm4, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] +; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-NEXT: andps %xmm3, %xmm0 +; SSE2-NEXT: orps %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: psllq $63, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE2-NEXT: retq + %cmp1 = icmp eq <4 x i64> %x, + %cmp2 = icmp eq <4 x i64> %x, + %cmp = or <4 x i1> %cmp1, %cmp2 + %r = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %r +} + +define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) { +; AVX512-LABEL: ne_and_to_abs_vec4x64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1} +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: ne_and_to_abs_vec4x64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; SSE41-LABEL: ne_and_to_abs_vec4x64: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE41-NEXT: xorps %xmm3, %xmm0 +; SSE41-NEXT: andnps %xmm0, %xmm2 +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: ne_and_to_abs_vec4x64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-NEXT: andps %xmm4, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-NEXT: andps %xmm4, %xmm0 +; SSE2-NEXT: xorps %xmm3, %xmm0 +; SSE2-NEXT: andnps %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp ne <4 x i64> %x, + %cmp2 = icmp ne <4 x i64> %x, + %cmp = and <4 x i1> %cmp1, %cmp2 + ret <4 x i1> %cmp +} + +define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) { +; AVX512-LABEL: ne_and_to_abs_vec4x64_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 {%k1} +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX2-LABEL: ne_and_to_abs_vec4x64_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] +; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] +; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: ne_and_to_abs_vec4x64_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 +; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE41-NEXT: xorps %xmm3, %xmm0 +; SSE41-NEXT: andnps %xmm0, %xmm2 +; SSE41-NEXT: pmovsxdq %xmm2, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] +; SSE41-NEXT: psllq $63, %xmm1 +; SSE41-NEXT: psrad $31, %xmm1 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE41-NEXT: retq +; +; SSE2-LABEL: ne_and_to_abs_vec4x64_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [129,129] +; SSE2-NEXT: movdqa %xmm1, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] +; SSE2-NEXT: andps %xmm4, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] +; SSE2-NEXT: andps %xmm4, %xmm2 +; SSE2-NEXT: xorps %xmm3, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: psllq $63, %xmm2 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; SSE2-NEXT: retq + %cmp1 = icmp ne <4 x i64> %x, + %cmp2 = icmp ne <4 x i64> %x, + %cmp = and <4 x i1> %cmp1, %cmp2 + %r = sext <4 x i1> %cmp to <4 x i64> + ret <4 x i64> %r +} + +define <4 x i1> @eq_or_to_abs_vec4x32(<4 x i32> %x) { +; AVX512-LABEL: eq_or_to_abs_vec4x32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0 +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 +; AVX512-NEXT: korw %k1, %k0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX2-LABEL: eq_or_to_abs_vec4x32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_to_abs_vec4x32: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_to_abs_vec4x32: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp eq <4 x i32> %x, + %cmp2 = icmp eq <4 x i32> %x, + %cmp = or <4 x i1> %cmp1, %cmp2 + ret <4 x i1> %cmp +} + +define <4 x i32> @eq_or_to_abs_vec4x32_sext(<4 x i32> %x) { +; AVX512-LABEL: eq_or_to_abs_vec4x32_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k0 +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 +; AVX512-NEXT: korw %k1, %k0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX2-LABEL: eq_or_to_abs_vec4x32_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_to_abs_vec4x32_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_to_abs_vec4x32_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp eq <4 x i32> %x, + %cmp2 = icmp eq <4 x i32> %x, + %cmp = or <4 x i1> %cmp1, %cmp2 + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) { +; AVX512-LABEL: ne_and_to_abs_vec4x32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1} +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX2-LABEL: ne_and_to_abs_vec4x32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: ne_and_to_abs_vec4x32: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: ne_and_to_abs_vec4x32: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp ne <4 x i32> %x, + %cmp2 = icmp ne <4 x i32> %x, + %cmp = and <4 x i1> %cmp1, %cmp2 + ret <4 x i1> %cmp +} + +define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) { +; AVX512-LABEL: ne_and_to_abs_vec4x32_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vpcmpneqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 {%k1} +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq +; +; AVX2-LABEL: ne_and_to_abs_vec4x32_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: ne_and_to_abs_vec4x32_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: ne_and_to_abs_vec4x32_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp ne <4 x i32> %x, + %cmp2 = icmp ne <4 x i32> %x, + %cmp = and <4 x i1> %cmp1, %cmp2 + %r = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %r +} + +define <4 x i1> @eq_or_to_abs_vec4x16(<4 x i16> %x) { +; AVX512-LABEL: eq_or_to_abs_vec4x16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0 +; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1 +; AVX512-NEXT: korw %k1, %k0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: eq_or_to_abs_vec4x16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmovsxwd %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_to_abs_vec4x16: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> +; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 +; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_to_abs_vec4x16: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> +; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] +; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp eq <4 x i16> %x, + %cmp2 = icmp eq <4 x i16> %x, + %cmp = or <4 x i1> %cmp1, %cmp2 + ret <4 x i1> %cmp +} + +define <4 x i8> @eq_or_to_abs_vec4x8_sext(<4 x i8> %x) { +; AVX512-LABEL: eq_or_to_abs_vec4x8_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512-NEXT: korw %k1, %k0, %k1 +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: eq_or_to_abs_vec4x8_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: eq_or_to_abs_vec4x8_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: eq_or_to_abs_vec4x8_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp eq <4 x i8> %x, + %cmp2 = icmp eq <4 x i8> %x, + %cmp = or <4 x i1> %cmp1, %cmp2 + %r = sext <4 x i1> %cmp to <4 x i8> + ret <4 x i8> %r +} + +define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) { +; AVX512-LABEL: ne_and_to_abs_vec4x8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512-NEXT: kandw %k1, %k0, %k1 +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: ne_and_to_abs_vec4x8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: ne_and_to_abs_vec4x8: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm2, %xmm1 +; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 +; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: ne_and_to_abs_vec4x8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] +; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp ne <4 x i8> %x, + %cmp2 = icmp ne <4 x i8> %x, + %cmp = and <4 x i1> %cmp1, %cmp2 + ret <4 x i1> %cmp +} + +define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) { +; AVX512-LABEL: ne_and_to_abs_vec4x16_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0 +; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1 +; AVX512-NEXT: kandw %k1, %k0, %k1 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX512-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: ne_and_to_abs_vec4x16_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; SSE41-LABEL: ne_and_to_abs_vec4x16_sext: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> +; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; SSE2-LABEL: ne_and_to_abs_vec4x16_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> +; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq + %cmp1 = icmp ne <4 x i16> %x, + %cmp2 = icmp ne <4 x i16> %x, + %cmp = and <4 x i1> %cmp1, %cmp2 + %r = sext <4 x i1> %cmp to <4 x i16> + ret <4 x i16> %r +} diff --git a/llvm/test/CodeGen/X86/icmp-abs-C.ll b/llvm/test/CodeGen/X86/icmp-abs-C.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/icmp-abs-C.ll @@ -0,0 +1,222 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64 + +declare i64 @llvm.abs.i64(i64, i1) +declare i32 @llvm.abs.i32(i32, i1) +declare i16 @llvm.abs.i16(i16, i1) +declare i8 @llvm.abs.i8(i8, i1) + +define i64 @eq_or_with_dom_abs(i64 %x) nounwind { +; X86-LABEL: eq_or_with_dom_abs: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: movl %esi, %edx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: xorl %edi, %eax +; X86-NEXT: subl %edi, %eax +; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: xorl $12312, %eax # imm = 0x3018 +; X86-NEXT: addl $64, %ecx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: andl $-129, %ecx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: orl %esi, %ecx +; X86-NEXT: sete %cl +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: movl $2344, %edi # imm = 0x928 +; X86-NEXT: cmpl %eax, %edi +; X86-NEXT: sbbl %edx, %esi +; X86-NEXT: jb .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %cl, %bl +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: .LBB0_2: +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: eq_or_with_dom_abs: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rcx +; X64-NEXT: negq %rcx +; X64-NEXT: cmovsq %rdi, %rcx +; X64-NEXT: xorq $12312, %rcx # imm = 0x3018 +; X64-NEXT: addq $64, %rdi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testq $-129, %rdi +; X64-NEXT: sete %al +; X64-NEXT: cmpq $2345, %rcx # imm = 0x929 +; X64-NEXT: cmovaeq %rcx, %rax +; X64-NEXT: retq + %absx = call i64 @llvm.abs.i64(i64 %x, i1 true) + %foo = xor i64 %absx, 12312 + %bar = icmp ugt i64 %foo, 2344 + %cmp0 = icmp eq i64 %x, 64 + %cmp1 = icmp eq i64 %x, -64 + %cmp = or i1 %cmp0, %cmp1 + %cmp64 = zext i1 %cmp to i64 + %r = select i1 %bar, i64 %foo, i64 %cmp64 + ret i64 %r +} + +define i32 @eq_or_with_dom_abs_non_po2(i32 %x) nounwind { +; X86-LABEL: eq_or_with_dom_abs_non_po2: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: xorl $12312, %eax # imm = 0x3018 +; X86-NEXT: cmpl $123, %edx +; X86-NEXT: sete %cl +; X86-NEXT: cmpl $-123, %edx +; X86-NEXT: sete %dl +; X86-NEXT: cmpl $2345, %eax # imm = 0x929 +; X86-NEXT: jae .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: orb %dl, %cl +; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: .LBB1_2: +; X86-NEXT: retl +; +; X64-LABEL: eq_or_with_dom_abs_non_po2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: negl %ecx +; X64-NEXT: cmovsl %edi, %ecx +; X64-NEXT: xorl $12312, %ecx # imm = 0x3018 +; X64-NEXT: cmpl $123, %edi +; X64-NEXT: sete %al +; X64-NEXT: cmpl $-123, %edi +; X64-NEXT: sete %dl +; X64-NEXT: orb %al, %dl +; X64-NEXT: cmpl $2345, %ecx # imm = 0x929 +; X64-NEXT: movzbl %dl, %eax +; X64-NEXT: cmovael %ecx, %eax +; X64-NEXT: retq + %absx = call i32 @llvm.abs.i32(i32 %x, i1 true) + %foo = xor i32 %absx, 12312 + %bar = icmp ugt i32 %foo, 2344 + %cmp0 = icmp eq i32 %x, 123 + %cmp1 = icmp eq i32 %x, -123 + %cmp = or i1 %cmp0, %cmp1 + %cmp64 = zext i1 %cmp to i32 + %r = select i1 %bar, i32 %foo, i32 %cmp64 + ret i32 %r +} + +define i8 @ne_and_with_dom_abs_non_pow2(i8 %x) nounwind { +; X86-LABEL: ne_and_with_dom_abs_non_pow2: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: sarb $7, %cl +; X86-NEXT: movl %edx, %eax +; X86-NEXT: xorb %cl, %al +; X86-NEXT: subb %cl, %al +; X86-NEXT: xorb $12, %al +; X86-NEXT: cmpb $121, %dl +; X86-NEXT: setne %cl +; X86-NEXT: cmpb $-121, %dl +; X86-NEXT: setne %dl +; X86-NEXT: cmpb $24, %al +; X86-NEXT: jae .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: andb %dl, %cl +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB2_2: +; X86-NEXT: retl +; +; X64-LABEL: ne_and_with_dom_abs_non_pow2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: sarb $7, %al +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: xorb %al, %cl +; X64-NEXT: subb %al, %cl +; X64-NEXT: xorb $12, %cl +; X64-NEXT: cmpb $121, %dil +; X64-NEXT: setne %al +; X64-NEXT: cmpb $-121, %dil +; X64-NEXT: setne %dl +; X64-NEXT: andb %al, %dl +; X64-NEXT: cmpb $24, %cl +; X64-NEXT: movzbl %dl, %edx +; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + %absx = call i8 @llvm.abs.i8(i8 %x, i1 true) + %foo = xor i8 %absx, 12 + %bar = icmp ugt i8 %foo, 23 + %cmp0 = icmp ne i8 %x, 121 + %cmp1 = icmp ne i8 %x, -121 + %cmp = and i1 %cmp0, %cmp1 + %cmp64 = zext i1 %cmp to i8 + %r = select i1 %bar, i8 %foo, i8 %cmp64 + ret i8 %r +} + +define i16 @ne_and_with_dom_abs(i16 %x) nounwind { +; X86-LABEL: ne_and_with_dom_abs: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movswl %cx, %edx +; X86-NEXT: sarl $15, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: subl %edx, %eax +; X86-NEXT: xorl $12312, %eax # imm = 0x3018 +; X86-NEXT: movzwl %ax, %esi +; X86-NEXT: addl $64, %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: testl $65407, %ecx # imm = 0xFF7F +; X86-NEXT: setne %cl +; X86-NEXT: cmpl $2345, %esi # imm = 0x929 +; X86-NEXT: jae .LBB3_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %cl, %dl +; X86-NEXT: movl %edx, %eax +; X86-NEXT: .LBB3_2: +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: ne_and_with_dom_abs: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: negw %cx +; X64-NEXT: cmovsw %di, %cx +; X64-NEXT: xorl $12312, %ecx # imm = 0x3018 +; X64-NEXT: movzwl %cx, %edx +; X64-NEXT: addl $64, %edi +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testl $65407, %edi # imm = 0xFF7F +; X64-NEXT: setne %al +; X64-NEXT: cmpl $2345, %edx # imm = 0x929 +; X64-NEXT: cmovael %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %absx = call i16 @llvm.abs.i16(i16 %x, i1 true) + %foo = xor i16 %absx, 12312 + %bar = icmp ugt i16 %foo, 2344 + %cmp0 = icmp ne i16 %x, 64 + %cmp1 = icmp ne i16 %x, -64 + %cmp = and i1 %cmp0, %cmp1 + %cmp64 = zext i1 %cmp to i16 + %r = select i1 %bar, i16 %foo, i16 %cmp64 + ret i16 %r +}