diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll @@ -0,0 +1,528 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI,CHECK-NOBMI-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+sse2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-BMI2-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX2,CHECK-AVX512 +declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) +declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) +declare i64 @llvm.fshl.i64(i64, i64, i64) +declare i64 @llvm.fshr.i64(i64, i64, i64) +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i32 @llvm.fshr.i32(i32, i32, i32) +declare i16 @llvm.fshl.i16(i16, i16, i16) +declare i16 @llvm.fshr.i16(i16, i16, i16) +declare i8 @llvm.fshl.i8(i8, i8, i8) +declare i8 @llvm.fshr.i8(i8, i8, i8) + +define i1 @shr_to_shl_eq_i8_s2(i8 %x) { +; CHECK-LABEL: shr_to_shl_eq_i8_s2: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $63, %al +; CHECK-NEXT: shrb $2, %dil +; CHECK-NEXT: cmpb %dil, %al +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %and = and i8 %x, 63 + %shr = lshr i8 %x, 2 + %r = icmp eq i8 %and, %shr + ret i1 %r +} + +define i1 @shl_to_shr_ne_i8_s7(i8 %x) { +; CHECK-LABEL: shl_to_shr_ne_i8_s7: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shlb $7, %al +; CHECK-NEXT: andb $-128, %dil +; CHECK-NEXT: cmpb %dil, %al +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %shl = shl i8 %x, 7 + %and = and i8 %x, 128 + %r = icmp ne i8 %shl, %and + ret i1 %r +} + +define i1 @rorl_to_srl_ne_i8_s5_fail(i8 %x) { +; CHECK-LABEL: rorl_to_srl_ne_i8_s5_fail: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: rolb $5, %al +; CHECK-NEXT: cmpb %dil, %al +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %ror = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 5) + %r = icmp ne i8 %ror, %x + ret i1 %r +} + +define i1 @shr_to_shl_eq_i8_s1(i8 %x) { +; CHECK-LABEL: shr_to_shl_eq_i8_s1: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $127, %al +; CHECK-NEXT: shrb %dil +; CHECK-NEXT: cmpb %dil, %al +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %and = and i8 %x, 127 + %shr = lshr i8 %x, 1 + %r = icmp eq i8 %and, %shr + ret i1 %r +} + +define i1 @shr_to_shl_eq_i32_s3(i32 %x) { +; CHECK-LABEL: shr_to_shl_eq_i32_s3: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF +; CHECK-NEXT: shrl $3, %edi +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %and = and i32 %x, 536870911 + %shr = lshr i32 %x, 3 + %r = icmp eq i32 %and, %shr + ret i1 %r +} + +define i1 @shl_to_shr_eq_i32_s3_fail(i32 %x) { +; CHECK-LABEL: shl_to_shr_eq_i32_s3_fail: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF +; CHECK-NEXT: shll $3, %edi +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %and = and i32 %x, 536870911 + %shr = shl i32 %x, 3 + %r = icmp eq i32 %and, %shr + ret i1 %r +} + +define i1 @shl_to_shr_ne_i32_s16(i32 %x) { +; CHECK-LABEL: shl_to_shr_ne_i32_s16: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: andl $-65536, %edi # imm = 0xFFFF0000 +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %shl = shl i32 %x, 16 + %and = and i32 %x, 4294901760 + %r = icmp ne i32 %shl, %and + ret i1 %r +} + +define i1 @shl_to_shr_ne_i32_s16_fail(i32 %x) { +; CHECK-LABEL: shl_to_shr_ne_i32_s16_fail: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: andl $2147450880, %edi # imm = 0x7FFF8000 +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %shl = shl i32 %x, 16 + %and = and i32 %x, 2147450880 + %r = icmp ne i32 %shl, %and + ret i1 %r +} + +define i1 @shr_to_shl_eq_i16_s1(i16 %x) { +; CHECK-LABEL: shr_to_shl_eq_i16_s1: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: andl $32767, %edi # imm = 0x7FFF +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: cmpw %ax, %di +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %and = and i16 %x, 32767 + %shr = lshr i16 %x, 1 + %r = icmp eq i16 %and, %shr + ret i1 %r +} + +define i1 @shr_to_shl_eq_i16_s1_fail(i16 %x) { +; CHECK-LABEL: shr_to_shl_eq_i16_s1_fail: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: andl $32766, %edi # imm = 0x7FFE +; CHECK-NEXT: shrl %eax +; CHECK-NEXT: cmpw %ax, %di +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %and = and i16 %x, 32766 + %shr = lshr i16 %x, 1 + %r = icmp eq i16 %and, %shr + ret i1 %r +} + +define i1 @shl_to_shr_eq_i64_s44(i64 %x) { +; CHECK-LABEL: shl_to_shr_eq_i64_s44: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-17592186044416, %rax # imm = 0xFFFFF00000000000 +; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: shlq $44, %rdi +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %shl = shl i64 %x, 44 + %and = and i64 %x, 18446726481523507200 + %r = icmp eq i64 %shl, %and + ret i1 %r +} + +define i1 @shr_to_shl_ne_i64_s32(i64 %x) { +; CHECK-LABEL: shr_to_shl_ne_i64_s32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrq $32, %rdi +; CHECK-NEXT: cmpq %rdi, %rax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %and = and i64 %x, 4294967295 + %shr = lshr i64 %x, 32 + %r = icmp ne i64 %and, %shr + ret i1 %r +} + +define i1 @rorl_to_shl_eq_i64_s16(i64 %x) { +; CHECK-NOBMI-LABEL: rorl_to_shl_eq_i64_s16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq %rdi, %rax +; CHECK-NOBMI-NEXT: rolq $16, %rax +; CHECK-NOBMI-NEXT: cmpq %rdi, %rax +; CHECK-NOBMI-NEXT: sete %al +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-LABEL: rorl_to_shl_eq_i64_s16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: rorxq $48, %rdi, %rax +; CHECK-BMI2-NEXT: cmpq %rdi, %rax +; CHECK-BMI2-NEXT: sete %al +; CHECK-BMI2-NEXT: retq + %ror = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 16) + %r = icmp eq i64 %ror, %x + ret i1 %r +} + +define i1 @ashr_to_shl_ne_i64_s32_fail(i64 %x) { +; CHECK-LABEL: ashr_to_shl_ne_i64_s32_fail: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: sarq $32, %rdi +; CHECK-NEXT: cmpq %rdi, %rax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %and = and i64 %x, 4294967295 + %shr = ashr i64 %x, 32 + %r = icmp ne i64 %and, %shr + ret i1 %r +} + +define i1 @shl_to_shr_eq_i64_s63(i64 %x) { +; CHECK-LABEL: shl_to_shr_eq_i64_s63: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: shlq $63, %rdi +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %shl = shl i64 %x, 63 + %and = and i64 %x, 9223372036854775808 + %r = icmp eq i64 %shl, %and + ret i1 %r +} + +define i1 @shl_to_shr_eq_i64_s63_fail(i64 %x) { +; CHECK-LABEL: shl_to_shr_eq_i64_s63_fail: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: shlq $63, %rdi +; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: seta %al +; CHECK-NEXT: retq + %shl = shl i64 %x, 63 + %and = and i64 %x, 9223372036854775808 + %r = icmp ugt i64 %shl, %and + ret i1 %r +} + +define i1 @shr_to_shl_eq_i64_s7(i64 %x) { +; CHECK-NOBMI-LABEL: shr_to_shl_eq_i64_s7: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movabsq $144115188075855871, %rax # imm = 0x1FFFFFFFFFFFFFF +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: shrq $7, %rdi +; CHECK-NOBMI-NEXT: cmpq %rdi, %rax +; CHECK-NOBMI-NEXT: sete %al +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-LABEL: shr_to_shl_eq_i64_s7: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movb $57, %al +; CHECK-BMI2-NEXT: bzhiq %rax, %rdi, %rax +; CHECK-BMI2-NEXT: shrq $7, %rdi +; CHECK-BMI2-NEXT: cmpq %rdi, %rax +; CHECK-BMI2-NEXT: sete %al +; CHECK-BMI2-NEXT: retq + %and = and i64 %x, 144115188075855871 + %shr = lshr i64 %x, 7 + %r = icmp eq i64 %and, %shr + ret i1 %r +} + +define i1 @shl_to_shr_ne_i32_s24(i32 %x) { +; CHECK-LABEL: shl_to_shr_ne_i32_s24: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: andl $-16777216, %edi # imm = 0xFF000000 +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %shl = shl i32 %x, 24 + %and = and i32 %x, 4278190080 + %r = icmp ne i32 %shl, %and + ret i1 %r +} + +define i1 @shr_to_shl_ne_i32_s24_fail(i32 %x) { +; CHECK-LABEL: shr_to_shl_ne_i32_s24_fail: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shrl $24, %eax +; CHECK-NEXT: andl $-16777216, %edi # imm = 0xFF000000 +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %shl = lshr i32 %x, 24 + %and = and i32 %x, 4278190080 + %r = icmp ne i32 %shl, %and + ret i1 %r +} + +define i1 @shr_to_shl_ne_i32_s8(i32 %x) { +; CHECK-LABEL: shr_to_shl_ne_i32_s8: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $16777215, %eax # imm = 0xFFFFFF +; CHECK-NEXT: shrl $8, %edi +; CHECK-NEXT: cmpl %edi, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %and = and i32 %x, 16777215 + %shr = lshr i32 %x, 8 + %r = icmp ne i32 %and, %shr + ret i1 %r +} + +define <4 x i1> @shr_to_ror_eq_4xi32_s4(<4 x i32> %x) { +; CHECK-NOBMI-LABEL: shr_to_ror_eq_4xi32_s4: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1 +; CHECK-NOBMI-NEXT: psrld $4, %xmm1 +; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-SSE2-LABEL: shr_to_ror_eq_4xi32_s4: +; CHECK-BMI2-SSE2: # %bb.0: +; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1 +; CHECK-BMI2-SSE2-NEXT: psrld $4, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: retq +; +; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4: +; CHECK-AVX512: # %bb.0: +; CHECK-AVX512-NEXT: vpsrld $4, %xmm0, %xmm1 +; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: retq + %shr = lshr <4 x i32> %x, + %and = and <4 x i32> %x, + %r = icmp ne <4 x i32> %shr, %and + ret <4 x i1> %r +} + +define <4 x i1> @shl_to_ror_eq_4xi32_s8(<4 x i32> %x) { +; CHECK-NOBMI-LABEL: shl_to_ror_eq_4xi32_s8: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1 +; CHECK-NOBMI-NEXT: pslld $8, %xmm1 +; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-SSE2-LABEL: shl_to_ror_eq_4xi32_s8: +; CHECK-BMI2-SSE2: # %bb.0: +; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pslld $8, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: retq +; +; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s8: +; CHECK-AVX512: # %bb.0: +; CHECK-AVX512-NEXT: vpslld $8, %xmm0, %xmm1 +; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: retq + %shr = shl <4 x i32> %x, + %and = and <4 x i32> %x, + %r = icmp ne <4 x i32> %shr, %and + ret <4 x i1> %r +} + +define <4 x i1> @shl_to_ror_eq_4xi32_s7_fail_no_p2(<4 x i32> %x) { +; CHECK-NOBMI-LABEL: shl_to_ror_eq_4xi32_s7_fail_no_p2: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1 +; CHECK-NOBMI-NEXT: pslld $7, %xmm1 +; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-SSE2-LABEL: shl_to_ror_eq_4xi32_s7_fail_no_p2: +; CHECK-BMI2-SSE2: # %bb.0: +; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pslld $7, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: retq +; +; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s7_fail_no_p2: +; CHECK-AVX512: # %bb.0: +; CHECK-AVX512-NEXT: vpslld $7, %xmm0, %xmm1 +; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: retq + %shr = shl <4 x i32> %x, + %and = and <4 x i32> %x, + %r = icmp ne <4 x i32> %shr, %and + ret <4 x i1> %r +} + +define <4 x i1> @shr_to_ror_eq_4xi32_s4_fail_no_splat(<4 x i32> %x) { +; CHECK-NOBMI-LABEL: shr_to_ror_eq_4xi32_s4_fail_no_splat: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1 +; CHECK-NOBMI-NEXT: psrld $4, %xmm1 +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NOBMI-NEXT: psrld $8, %xmm2 +; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[2,0] +; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,0] +; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-SSE2-LABEL: shr_to_ror_eq_4xi32_s4_fail_no_splat: +; CHECK-BMI2-SSE2: # %bb.0: +; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1 +; CHECK-BMI2-SSE2-NEXT: psrld $4, %xmm1 +; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-BMI2-SSE2-NEXT: psrld $8, %xmm2 +; CHECK-BMI2-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[2,0] +; CHECK-BMI2-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,0] +; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: retq +; +; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4_fail_no_splat: +; CHECK-AVX512: # %bb.0: +; CHECK-AVX512-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: retq + %shr = lshr <4 x i32> %x, + %and = and <4 x i32> %x, + %r = icmp ne <4 x i32> %shr, %and + ret <4 x i1> %r +} + +define <16 x i1> @shl_to_ror_eq_16xi16_s8_fail_preserve_i16(<16 x i16> %x) { +; CHECK-NOBMI-LABEL: shl_to_ror_eq_16xi16_s8_fail_preserve_i16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NOBMI-NEXT: psllw $8, %xmm2 +; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3 +; CHECK-NOBMI-NEXT: psllw $8, %xmm3 +; CHECK-NOBMI-NEXT: movdqa {{.*#+}} xmm4 = [0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255] +; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqw %xmm2, %xmm0 +; CHECK-NOBMI-NEXT: pand %xmm4, %xmm1 +; CHECK-NOBMI-NEXT: pcmpeqw %xmm3, %xmm1 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm1 +; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm0 +; CHECK-NOBMI-NEXT: packsswb %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-SSE2-LABEL: shl_to_ror_eq_16xi16_s8_fail_preserve_i16: +; CHECK-BMI2-SSE2: # %bb.0: +; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm2 +; CHECK-BMI2-SSE2-NEXT: psllw $8, %xmm2 +; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm3 +; CHECK-BMI2-SSE2-NEXT: psllw $8, %xmm3 +; CHECK-BMI2-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255] +; CHECK-BMI2-SSE2-NEXT: pand %xmm4, %xmm0 +; CHECK-BMI2-SSE2-NEXT: pcmpeqw %xmm2, %xmm0 +; CHECK-BMI2-SSE2-NEXT: pand %xmm4, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pcmpeqw %xmm3, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-BMI2-SSE2-NEXT: pxor %xmm2, %xmm1 +; CHECK-BMI2-SSE2-NEXT: pxor %xmm2, %xmm0 +; CHECK-BMI2-SSE2-NEXT: packsswb %xmm1, %xmm0 +; CHECK-BMI2-SSE2-NEXT: retq +; +; CHECK-AVX512-LABEL: shl_to_ror_eq_16xi16_s8_fail_preserve_i16: +; CHECK-AVX512: # %bb.0: +; CHECK-AVX512-NEXT: vpsllw $8, %ymm0, %ymm1 +; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 +; CHECK-AVX512-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0 +; CHECK-AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; CHECK-AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; CHECK-AVX512-NEXT: vzeroupper +; CHECK-AVX512-NEXT: retq + %shr = shl <16 x i16> %x, + %and = and <16 x i16> %x, + %r = icmp ne <16 x i16> %shr, %and + ret <16 x i1> %r +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-AVX2: {{.*}} +; CHECK-NOBMI-SSE2: {{.*}}