diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4244,6 +4244,9 @@ } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 let Predicates = [UseAVX] in { + def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), + (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIrr GR32:$src)>; @@ -5353,8 +5356,13 @@ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } -let Predicates = [HasAVX, NoBWI] in +let Predicates = [HasAVX, NoBWI] in { defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG; + def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), + (VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src2, sub_8bit), timm:$src3)>; +} + let Constraints = "$src1 = $dst" in defm PINSRB : SS41I_insert8<0x20, "pinsrb">; diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -718,8 +718,7 @@ ; ; X64-LABEL: test_mm256_insert_epi8: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1 +; X64-NEXT: vpinsrb $4, %edi, %xmm0, %xmm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <32 x i8> @@ -1421,8 +1420,8 @@ ; X86-LABEL: test_mm256_set_epi8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm0 +; X86-NEXT: vmovd %eax, %xmm0 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 @@ -1453,8 +1452,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm1 +; X86-NEXT: vmovd %eax, %xmm1 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 @@ -1489,10 +1488,10 @@ ; ; X64-LABEL: test_mm256_set_epi8: ; X64: # %bb.0: -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vmovd %eax, %xmm0 -; X64-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax @@ -1509,21 +1508,15 @@ ; X64-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %r8b, %eax -; X64-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %dl, %eax -; X64-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: vmovd %eax, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: vmovd %ecx, %xmm1 ; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 @@ -1862,8 +1855,7 @@ ; ; X64-LABEL: test_mm256_set1_epi8: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: vmovd %eax, %xmm0 +; X64-NEXT: vmovd %edi, %xmm0 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -2037,8 +2029,8 @@ ; X86-LABEL: test_mm256_setr_epi8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm0 +; X86-NEXT: vmovd %eax, %xmm0 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 @@ -2069,8 +2061,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm1 +; X86-NEXT: vmovd %eax, %xmm1 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 @@ -2105,10 +2097,10 @@ ; ; X64-LABEL: test_mm256_setr_epi8: ; X64: # %bb.0: -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vmovd %eax, %xmm0 -; X64-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax @@ -2137,18 +2129,12 @@ ; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %esi -; X64-NEXT: vmovd %esi, %xmm1 -; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %dl, %eax -; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %r8b, %eax -; X64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; X64-NEXT: vmovd %edi, %xmm1 +; X64-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $4, %r8d, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $5, %r9d, %xmm1, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -6209,31 +6209,69 @@ declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { -; CHECK-LABEL: test_cmp_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc0] -; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02] -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04] -; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x05] -; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe1] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] -; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] -; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] -; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; X86-LABEL: test_cmp_q_512: +; X86: ## %bb.0: +; X86-NEXT: pushl %ebx ## encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi ## encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %esi ## encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] +; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X86-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc0] +; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; X86-NEXT: vpcmpleq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x02] +; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x05] +; X86-NEXT: kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1] +; X86-NEXT: kmovw %k0, %ebx ## encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] +; X86-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] +; X86-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X86-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X86-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X86-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x06] +; X86-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] +; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi ## encoding: [0x5e] +; X86-NEXT: popl %edi ## encoding: [0x5f] +; X86-NEXT: popl %ebx ## encoding: [0x5b] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_cmp_q_512: +; X64: ## %bb.0: +; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] +; X64-NEXT: kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0] +; X64-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc0] +; X64-NEXT: kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpleq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x02] +; X64-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0] +; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x05] +; X64-NEXT: kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8] +; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1] +; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: movzbl %r8b, %ecx ## encoding: [0x41,0x0f,0xb6,0xc8] +; X64-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ## encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X64-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] +; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) @@ -6256,28 +6294,44 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { ; X86-LABEL: test_mask_cmp_q_512: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: pushl %ebp ## encoding: [0x55] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebx ## encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %edi ## encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: pushl %esi ## encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x14] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; X86-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0] -; X86-NEXT: vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02] -; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04] -; X86-NEXT: vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05] -; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9] ; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x02] +; X86-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x05] +; X86-NEXT: kmovw %k0, %ebx ## encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc1] +; X86-NEXT: kmovw %k0, %ebp ## encoding: [0xc5,0xf8,0x93,0xe8] ; X86-NEXT: movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9] ; X86-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] -; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca] -; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] -; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] -; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] -; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] -; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] +; X86-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02] +; X86-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x04] +; X86-NEXT: vpinsrb $5, %ebx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x05] +; X86-NEXT: vpinsrb $6, %ebp, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc5,0x06] ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi ## encoding: [0x5e] +; X86-NEXT: popl %edi ## encoding: [0x5f] +; X86-NEXT: popl %ebx ## encoding: [0x5b] +; X86-NEXT: popl %ebp ## encoding: [0x5d] ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -6285,24 +6339,24 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] -; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0] ; X64-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0] -; X64-NEXT: vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02] -; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04] -; X64-NEXT: vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05] -; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9] +; X64-NEXT: kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x02] +; X64-NEXT: kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0] +; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x05] +; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc1] ; X64-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; X64-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] -; X64-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; X64-NEXT: movzbl %r8b, %edx ## encoding: [0x41,0x0f,0xb6,0xd0] +; X64-NEXT: vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ## encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 ## encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq ## encoding: [0xc3] @@ -6328,31 +6382,69 @@ declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { -; CHECK-LABEL: test_ucmp_q_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] -; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x01] -; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x02] -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04] -; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd9,0x05] -; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x06] -; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] -; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] -; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] -; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; X86-LABEL: test_ucmp_q_512: +; X86: ## %bb.0: +; X86-NEXT: pushl %ebx ## encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi ## encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %esi ## encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] +; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X86-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x01] +; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] +; X86-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x02] +; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x05] +; X86-NEXT: kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x06] +; X86-NEXT: kmovw %k0, %ebx ## encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] +; X86-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] +; X86-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X86-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X86-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X86-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x06] +; X86-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] +; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi ## encoding: [0x5e] +; X86-NEXT: popl %edi ## encoding: [0x5f] +; X86-NEXT: popl %ebx ## encoding: [0x5b] +; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: test_ucmp_q_512: +; X64: ## %bb.0: +; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1] +; X64-NEXT: kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0] +; X64-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x01] +; X64-NEXT: kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x02] +; X64-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0] +; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x05] +; X64-NEXT: kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8] +; X64-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x06] +; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: movzbl %r8b, %ecx ## encoding: [0x41,0x0f,0xb6,0xc8] +; X64-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ## encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X64-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00] +; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) @@ -6375,28 +6467,44 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { ; X86-LABEL: test_mask_ucmp_q_512: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: pushl %ebp ## encoding: [0x55] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebx ## encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %edi ## encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: pushl %esi ## encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x14] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; X86-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01] -; X86-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02] -; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04] -; X86-NEXT: vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05] -; X86-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06] ; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x02] +; X86-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x05] +; X86-NEXT: kmovw %k0, %ebx ## encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x06] +; X86-NEXT: kmovw %k0, %ebp ## encoding: [0xc5,0xf8,0x93,0xe8] ; X86-NEXT: movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9] ; X86-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] -; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca] -; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] -; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] -; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] -; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] -; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] -; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9] -; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] +; X86-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02] +; X86-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x04] +; X86-NEXT: vpinsrb $5, %ebx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x05] +; X86-NEXT: vpinsrb $6, %ebp, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc5,0x06] ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi ## encoding: [0x5e] +; X86-NEXT: popl %edi ## encoding: [0x5f] +; X86-NEXT: popl %ebx ## encoding: [0x5b] +; X86-NEXT: popl %ebp ## encoding: [0x5d] ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl ## encoding: [0xc3] ; @@ -6404,24 +6512,24 @@ ; X64: ## %bb.0: ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1] -; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0] ; X64-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01] -; X64-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02] -; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04] -; X64-NEXT: vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05] -; X64-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06] +; X64-NEXT: kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x02] +; X64-NEXT: kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0] +; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x05] +; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x06] ; X64-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; X64-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] -; X64-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] -; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; X64-NEXT: movzbl %r8b, %edx ## encoding: [0x41,0x0f,0xb6,0xd0] +; X64-NEXT: vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ## encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 ## encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] -; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq ## encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -9492,31 +9492,72 @@ declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { -; CHECK-LABEL: test_cmp_d_256: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] -; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc0] -; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x02] -; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04] -; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x05] -; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe1] -; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] -; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] -; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] -; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-LABEL: test_cmp_d_256: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx # encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi # encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %esi # encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] +; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X86-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc0] +; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] +; X86-NEXT: vpcmpled %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x02] +; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x05] +; X86-NEXT: kmovw %k0, %edi # encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] +; X86-NEXT: kmovw %k0, %ebx # encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; X86-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] +; X86-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X86-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X86-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X86-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x06] +; X86-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] +; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi # encoding: [0x5e] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %edi # encoding: [0x5f] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebx # encoding: [0x5b] +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_cmp_d_256: +; X64: # %bb.0: +; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] +; X64-NEXT: kmovw %k0, %r8d # encoding: [0xc5,0x78,0x93,0xc0] +; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc0] +; X64-NEXT: kmovw %k0, %r9d # encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpled %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x02] +; X64-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] +; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x05] +; X64-NEXT: kmovw %k0, %edi # encoding: [0xc5,0xf8,0x93,0xf8] +; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] +; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: movzbl %r8b, %ecx # encoding: [0x41,0x0f,0xb6,0xc8] +; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X64-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] +; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) @@ -9539,28 +9580,48 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { ; X86-LABEL: test_mask_cmp_d_256: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: pushl %ebp # encoding: [0x55] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebx # encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %edi # encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: pushl %esi # encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] ; X86-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0] -; X86-NEXT: vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02] -; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] -; X86-NEXT: vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05] -; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9] ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpled %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x02] +; X86-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %edi # encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x05] +; X86-NEXT: kmovw %k0, %ebx # encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1] +; X86-NEXT: kmovw %k0, %ebp # encoding: [0xc5,0xf8,0x93,0xe8] ; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] ; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] -; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca] -; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] -; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb] -; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] -; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc] -; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] -; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] -; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] +; X86-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02] +; X86-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x04] +; X86-NEXT: vpinsrb $5, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x05] +; X86-NEXT: vpinsrb $6, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc5,0x06] ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi # encoding: [0x5e] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: popl %edi # encoding: [0x5f] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %ebx # encoding: [0x5b] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp # encoding: [0x5d] +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -9568,24 +9629,24 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] -; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: kmovw %k0, %r8d # encoding: [0xc5,0x78,0x93,0xc0] ; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0] -; X64-NEXT: vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02] -; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] -; X64-NEXT: vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05] -; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9] +; X64-NEXT: kmovw %k0, %r9d # encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpled %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x02] +; X64-NEXT: kmovw %k0, %r10d # encoding: [0xc5,0x78,0x93,0xd0] +; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x05] +; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1] ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] -; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] -; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] -; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] -; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] +; X64-NEXT: movzbl %r8b, %edx # encoding: [0x41,0x0f,0xb6,0xd0] +; X64-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] -; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] @@ -9611,31 +9672,72 @@ declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { -; CHECK-LABEL: test_ucmp_d_256: -; CHECK: # %bb.0: -; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] -; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x01] -; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x02] -; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04] -; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x05] -; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x06] -; CHECK-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] -; CHECK-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] -; CHECK-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] -; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] -; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] -; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] -; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] -; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; X86-LABEL: test_ucmp_d_256: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx # encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi # encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %esi # encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] +; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X86-NEXT: vpcmpltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x01] +; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] +; X86-NEXT: vpcmpleud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x02] +; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpnltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x05] +; X86-NEXT: kmovw %k0, %edi # encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x06] +; X86-NEXT: kmovw %k0, %ebx # encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; X86-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] +; X86-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X86-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X86-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X86-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x06] +; X86-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] +; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi # encoding: [0x5e] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %edi # encoding: [0x5f] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebx # encoding: [0x5b] +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_ucmp_d_256: +; X64: # %bb.0: +; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] +; X64-NEXT: kmovw %k0, %r8d # encoding: [0xc5,0x78,0x93,0xc0] +; X64-NEXT: vpcmpltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x01] +; X64-NEXT: kmovw %k0, %r9d # encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpleud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x02] +; X64-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] +; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x05] +; X64-NEXT: kmovw %k0, %edi # encoding: [0xc5,0xf8,0x93,0xf8] +; X64-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x06] +; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: movzbl %r8b, %ecx # encoding: [0x41,0x0f,0xb6,0xc8] +; X64-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] +; X64-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x05] +; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] +; X64-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; X64-NEXT: retq # encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) @@ -9658,28 +9760,48 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { ; X86-LABEL: test_mask_ucmp_d_256: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: pushl %ebp # encoding: [0x55] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebx # encoding: [0x53] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %edi # encoding: [0x57] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: pushl %esi # encoding: [0x56] +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] ; X86-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01] -; X86-NEXT: vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02] -; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] -; X86-NEXT: vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05] -; X86-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06] ; X86-NEXT: kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0] +; X86-NEXT: vpcmpleud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x02] +; X86-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x04] +; X86-NEXT: kmovw %k0, %edi # encoding: [0xc5,0xf8,0x93,0xf8] +; X86-NEXT: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x05] +; X86-NEXT: kmovw %k0, %ebx # encoding: [0xc5,0xf8,0x93,0xd8] +; X86-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x06] +; X86-NEXT: kmovw %k0, %ebp # encoding: [0xc5,0xf8,0x93,0xe8] ; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] ; X86-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01] -; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca] -; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02] -; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb] -; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04] -; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc] -; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05] -; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] -; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] +; X86-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02] +; X86-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x04] +; X86-NEXT: vpinsrb $5, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc3,0x05] +; X86-NEXT: vpinsrb $6, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc5,0x06] ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-NEXT: popl %esi # encoding: [0x5e] +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: popl %edi # encoding: [0x5f] +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %ebx # encoding: [0x5b] +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp # encoding: [0x5d] +; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -9687,24 +9809,24 @@ ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] -; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: kmovw %k0, %r8d # encoding: [0xc5,0x78,0x93,0xc0] ; X64-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01] -; X64-NEXT: vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02] -; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04] -; X64-NEXT: vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05] -; X64-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06] +; X64-NEXT: kmovw %k0, %r9d # encoding: [0xc5,0x78,0x93,0xc8] +; X64-NEXT: vpcmpleud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x02] +; X64-NEXT: kmovw %k0, %r10d # encoding: [0xc5,0x78,0x93,0xd0] +; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x04] +; X64-NEXT: kmovw %k0, %esi # encoding: [0xc5,0xf8,0x93,0xf0] +; X64-NEXT: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x05] +; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] +; X64-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x06] ; X64-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] -; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] -; X64-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01] -; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] -; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] -; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] +; X64-NEXT: movzbl %r8b, %edx # encoding: [0x41,0x0f,0xb6,0xd0] +; X64-NEXT: vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2] +; X64-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x01] +; X64-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x02] +; X64-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04] ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] -; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] -; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06] ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll --- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll @@ -147,8 +147,7 @@ ; ; AVX1-LABEL: bitcast_i8_8i1_freeze: ; AVX1: # %bb.0: -; AVX1-NEXT: movzbl %dil, %eax -; AVX1-NEXT: vmovd %eax, %xmm0 +; AVX1-NEXT: vmovd %edi, %xmm0 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll --- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll +++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll @@ -95,14 +95,6 @@ ; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: retq -; -; AVX-LABEL: sub_op1_constant: -; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax -; AVX-NEXT: addb $-42, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: retq %x = load i8, ptr %p %b = sub i8 %x, 42 %r = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -237,14 +229,6 @@ ; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: retq -; -; AVX-LABEL: shl_op1_constant: -; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax -; AVX-NEXT: shlb $5, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: retq %x = load i8, ptr %p %b = shl i8 %x, 5 %r = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -531,20 +515,6 @@ ; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: retq -; -; AVX-LABEL: urem_op1_constant: -; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrb %cl -; AVX-NEXT: movzbl %cl, %ecx -; AVX-NEXT: imull $49, %ecx, %ecx -; AVX-NEXT: shrl $10, %ecx -; AVX-NEXT: imull $42, %ecx, %ecx -; AVX-NEXT: subb %cl, %al -; AVX-NEXT: movzbl %al, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: retq %x = load i8, ptr %p %b = urem i8 %x, 42 %r = insertelement <16 x i8> undef, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll --- a/llvm/test/CodeGen/X86/pr15267.ll +++ b/llvm/test/CodeGen/X86/pr15267.ll @@ -31,19 +31,15 @@ ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrb %cl ; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %ecx ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: andb $1, %dl -; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: vmovd %edx, %xmm0 ; CHECK-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrb $2, %cl ; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %ecx ; CHECK-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: shrb $3, %al -; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %ret = load <4 x i1>, ptr %in, align 1 diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll --- a/llvm/test/CodeGen/X86/setcc-lowering.ll +++ b/llvm/test/CodeGen/X86/setcc-lowering.ll @@ -44,7 +44,6 @@ ; AVX-NEXT: .p2align 4, 0x90 ; AVX-NEXT: .LBB1_1: # %for_loop599 ; AVX-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: cmpq $65536, %rdi # imm = 0x10000 ; AVX-NEXT: setl %al ; AVX-NEXT: vmovd %eax, %xmm2 diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -3395,9 +3395,9 @@ ; ; X86-AVX1-LABEL: test_mm_set_epi8: ; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] -; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] -; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -3533,10 +3533,10 @@ ; ; X64-AVX1-LABEL: test_mm_set_epi8: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] ; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] -; X64-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] +; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] ; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] @@ -3553,18 +3553,12 @@ ; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] -; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] -; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] -; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] -; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X64-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a] +; X64-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b] +; X64-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c] +; X64-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d] +; X64-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e] +; X64-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f] ; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_set_epi8: @@ -3671,10 +3665,10 @@ ; ; X32-AVX1-LABEL: test_mm_set_epi8: ; X32-AVX1: # %bb.0: -; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x48] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] ; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] -; X32-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] ; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] @@ -3691,18 +3685,12 @@ ; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] ; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] -; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] -; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] -; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] -; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a] +; X32-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b] +; X32-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c] +; X32-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d] +; X32-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e] +; X32-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f] ; X32-AVX1-NEXT: retq # encoding: [0xc3] ; ; X32-AVX512-LABEL: test_mm_set_epi8: @@ -4397,8 +4385,7 @@ ; ; X64-AVX1-LABEL: test_mm_set1_epi8: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] ; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] ; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] ; X64-AVX1-NEXT: retq # encoding: [0xc3] @@ -4422,8 +4409,7 @@ ; ; X32-AVX1-LABEL: test_mm_set1_epi8: ; X32-AVX1: # %bb.0: -; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] ; X32-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] ; X32-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] ; X32-AVX1-NEXT: retq # encoding: [0xc3] @@ -4812,9 +4798,9 @@ ; ; X86-AVX1-LABEL: test_mm_setr_epi8: ; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] -; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -4950,18 +4936,12 @@ ; ; X64-AVX1-LABEL: test_mm_setr_epi8: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X64-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] -; X64-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] -; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] -; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] -; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01] +; X64-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X64-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03] +; X64-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04] +; X64-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] @@ -5088,18 +5068,12 @@ ; ; X32-AVX1-LABEL: test_mm_setr_epi8: ; X32-AVX1: # %bb.0: -; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X32-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] -; X32-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] -; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] -; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] -; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01] +; X32-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X32-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03] +; X32-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04] +; X32-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] ; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -571,12 +571,6 @@ ; X64-SSE-NEXT: movzbl %dil, %eax ; X64-SSE-NEXT: pinsrb $1, %eax, %xmm0 ; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: test_mm_insert_epi8: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movzbl %dil, %eax -; X64-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %res = insertelement <16 x i8> %arg0, i8 %a1,i32 1 %bc = bitcast <16 x i8> %res to <2 x i64>