diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4736,6 +4736,9 @@ } let Predicates = [HasAVX512] in { + def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), + (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIZrr GR32:$src)>; @@ -11714,6 +11717,25 @@ defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; +let Predicates = [HasAVX512, NoBWI] in { + def : Pat<(X86pinsrb VR128:$src1, + (i32 (anyext (i8 (bitconvert v8i1:$src2)))), + timm:$src3), + (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), + timm:$src3)>; +} + +let Predicates = [HasBWI] in { + def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), + (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src2, sub_8bit), timm:$src3)>; + def : Pat<(X86pinsrb VR128:$src1, + (i32 (anyext (i8 (bitconvert v8i1:$src2)))), + timm:$src3), + (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), + timm:$src3)>; +} + // Always select FP16 instructions if available. let Predicates = [HasBWI], AddedComplexity = -10 in { def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4244,6 +4244,9 @@ } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 let Predicates = [UseAVX] in { + def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), + (VMOVDI2PDIrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), (VMOVDI2PDIrr GR32:$src)>; @@ -5353,8 +5356,13 @@ Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } -let Predicates = [HasAVX, NoBWI] in +let Predicates = [HasAVX, NoBWI] in { defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG; + def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), + (VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src2, sub_8bit), timm:$src3)>; +} + let Constraints = "$src1 = $dst" in defm PINSRB : SS41I_insert8<0x20, "pinsrb">; diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -715,8 +715,7 @@ ; ; X64-LABEL: test_mm256_insert_epi8: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm1 +; X64-NEXT: vpinsrb $4, %edi, %xmm0, %xmm1 ; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <32 x i8> @@ -1418,8 +1417,8 @@ ; X86-LABEL: test_mm256_set_epi8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm0 +; X86-NEXT: vmovd %eax, %xmm0 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 @@ -1450,8 +1449,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm1 +; X86-NEXT: vmovd %eax, %xmm1 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 @@ -1487,8 +1486,8 @@ ; X64-LABEL: test_mm256_set_epi8: ; X64: # %bb.0: ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; X64-NEXT: vmovd %r10d, %xmm0 +; X64-NEXT: vmovd %eax, %xmm0 +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 @@ -1506,21 +1505,15 @@ ; X64-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %r8b, %eax -; X64-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %dl, %eax -; X64-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 +; X64-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: vmovd %eax, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; X64-NEXT: vmovd %ecx, %xmm1 ; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 @@ -1859,8 +1852,7 @@ ; ; X64-LABEL: test_mm256_set1_epi8: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: vmovd %eax, %xmm0 +; X64-NEXT: vmovd %edi, %xmm0 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -2034,8 +2026,8 @@ ; X86-LABEL: test_mm256_setr_epi8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm0 +; X86-NEXT: vmovd %eax, %xmm0 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 @@ -2066,8 +2058,8 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: vmovd %ecx, %xmm1 +; X86-NEXT: vmovd %eax, %xmm1 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 @@ -2103,8 +2095,8 @@ ; X64-LABEL: test_mm256_setr_epi8: ; X64: # %bb.0: ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; X64-NEXT: vmovd %r10d, %xmm0 +; X64-NEXT: vmovd %eax, %xmm0 +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 @@ -2134,18 +2126,12 @@ ; X64-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; X64-NEXT: movzbl %sil, %eax -; X64-NEXT: movzbl %dil, %esi -; X64-NEXT: vmovd %esi, %xmm1 -; X64-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %dl, %eax -; X64-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %cl, %eax -; X64-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %r8b, %eax -; X64-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 -; X64-NEXT: movzbl %r9b, %eax -; X64-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; X64-NEXT: vmovd %edi, %xmm1 +; X64-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $4, %r8d, %xmm1, %xmm1 +; X64-NEXT: vpinsrb $5, %r9d, %xmm1, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll --- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll +++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll @@ -100,7 +100,6 @@ ; AVX: # %bb.0: ; AVX-NEXT: movzbl (%rdi), %eax ; AVX-NEXT: addb $-42, %al -; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: retq %x = load i8, ptr %p @@ -242,7 +241,6 @@ ; AVX: # %bb.0: ; AVX-NEXT: movzbl (%rdi), %eax ; AVX-NEXT: shlb $5, %al -; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: retq %x = load i8, ptr %p @@ -542,7 +540,6 @@ ; AVX-NEXT: shrl $10, %ecx ; AVX-NEXT: imull $42, %ecx, %ecx ; AVX-NEXT: subb %cl, %al -; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: retq %x = load i8, ptr %p diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll --- a/llvm/test/CodeGen/X86/pr15267.ll +++ b/llvm/test/CodeGen/X86/pr15267.ll @@ -31,19 +31,15 @@ ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrb %cl ; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %ecx ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: andb $1, %dl -; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: vmovd %edx, %xmm0 ; CHECK-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrb $2, %cl ; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %ecx ; CHECK-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: shrb $3, %al -; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; CHECK-NEXT: retq %ret = load <4 x i1>, ptr %in, align 1 diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll --- a/llvm/test/CodeGen/X86/setcc-lowering.ll +++ b/llvm/test/CodeGen/X86/setcc-lowering.ll @@ -44,7 +44,6 @@ ; AVX-NEXT: .p2align 4, 0x90 ; AVX-NEXT: .LBB1_1: # %for_loop599 ; AVX-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: cmpq $65536, %rdi # imm = 0x10000 ; AVX-NEXT: setl %al ; AVX-NEXT: vmovd %eax, %xmm2 diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -3395,9 +3395,9 @@ ; ; X86-AVX1-LABEL: test_mm_set_epi8: ; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] -; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] -; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -3431,9 +3431,9 @@ ; ; X86-AVX512-LABEL: test_mm_set_epi8: ; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] -; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] -; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] ; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -3533,9 +3533,9 @@ ; ; X64-AVX1-LABEL: test_mm_set_epi8: ; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] -; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x50] -; X64-AVX1-NEXT: vmovd %r10d, %xmm0 # encoding: [0xc4,0xc1,0x79,0x6e,0xc2] ; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] ; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -3553,25 +3553,19 @@ ; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] -; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] -; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] -; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] -; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X64-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a] +; X64-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b] +; X64-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c] +; X64-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d] +; X64-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e] +; X64-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f] ; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_set_epi8: ; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] -; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x50] -; X64-AVX512-NEXT: vmovd %r10d, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc2] ; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] ; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -3589,18 +3583,12 @@ ; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] -; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] -; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] -; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] -; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X64-AVX512-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a] +; X64-AVX512-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b] +; X64-AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c] +; X64-AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d] +; X64-AVX512-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e] +; X64-AVX512-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f] ; X64-AVX512-NEXT: retq # encoding: [0xc3] ; ; X32-SSE-LABEL: test_mm_set_epi8: @@ -3671,9 +3659,9 @@ ; ; X32-AVX1-LABEL: test_mm_set_epi8: ; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] -; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x50] -; X32-AVX1-NEXT: vmovd %r10d, %xmm0 # encoding: [0xc4,0xc1,0x79,0x6e,0xc2] ; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] ; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -3691,25 +3679,19 @@ ; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] ; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] -; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] -; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] -; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] -; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a] +; X32-AVX1-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b] +; X32-AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c] +; X32-AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d] +; X32-AVX1-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e] +; X32-AVX1-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f] ; X32-AVX1-NEXT: retq # encoding: [0xc3] ; ; X32-AVX512-LABEL: test_mm_set_epi8: ; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] -; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x50] -; X32-AVX512-NEXT: vmovd %r10d, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc2] ; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] ; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -3727,18 +3709,12 @@ ; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] ; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] -; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] -; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] -; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] -; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] -; X32-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX512-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x0a] +; X32-AVX512-NEXT: vpinsrb $11, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x0b] +; X32-AVX512-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x0c] +; X32-AVX512-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x0d] +; X32-AVX512-NEXT: vpinsrb $14, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x0e] +; X32-AVX512-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0f] ; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 @@ -4397,8 +4373,7 @@ ; ; X64-AVX1-LABEL: test_mm_set1_epi8: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] ; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] ; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] ; X64-AVX1-NEXT: retq # encoding: [0xc3] @@ -4422,8 +4397,7 @@ ; ; X32-AVX1-LABEL: test_mm_set1_epi8: ; X32-AVX1: # %bb.0: -; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] -; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] ; X32-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] ; X32-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] ; X32-AVX1-NEXT: retq # encoding: [0xc3] @@ -4812,9 +4786,9 @@ ; ; X86-AVX1-LABEL: test_mm_setr_epi8: ; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] -; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] ; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -4848,9 +4822,9 @@ ; ; X86-AVX512-LABEL: test_mm_setr_epi8: ; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] -; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] ; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] ; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] @@ -4950,18 +4924,12 @@ ; ; X64-AVX1-LABEL: test_mm_setr_epi8: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X64-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] -; X64-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] -; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] -; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] -; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01] +; X64-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X64-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03] +; X64-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04] +; X64-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] @@ -4986,18 +4954,12 @@ ; ; X64-AVX512-LABEL: test_mm_setr_epi8: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X64-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] -; X64-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] -; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] -; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX512-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01] +; X64-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X64-AVX512-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03] +; X64-AVX512-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04] +; X64-AVX512-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05] ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] @@ -5088,18 +5050,12 @@ ; ; X32-AVX1-LABEL: test_mm_setr_epi8: ; X32-AVX1: # %bb.0: -; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X32-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] -; X32-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] -; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] -; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] -; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01] +; X32-AVX1-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X32-AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03] +; X32-AVX1-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04] +; X32-AVX1-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] ; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] @@ -5124,18 +5080,12 @@ ; ; X32-AVX512-LABEL: test_mm_setr_epi8: ; X32-AVX512: # %bb.0: -; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] -; X32-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] -; X32-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] -; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] -; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] -; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] -; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] -; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] -; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x01] +; X32-AVX512-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x02] +; X32-AVX512-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x03] +; X32-AVX512-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc0,0x04] +; X32-AVX512-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc1,0x05] ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] ; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -574,8 +574,7 @@ ; ; X64-AVX-LABEL: test_mm_insert_epi8: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movzbl %dil, %eax -; X64-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; X64-AVX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 ; X64-AVX-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %res = insertelement <16 x i8> %arg0, i8 %a1,i32 1