diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -116,6 +116,8 @@ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ + ENUM_ENTRY(IC_64BIT_VEX_OPSIZE, 4, "requires 64-bit mode and VEX") \ + ENUM_ENTRY(IC_64BIT_VEX_OPSIZE_ADSIZE, 5, "requires 64-bit mode, VEX, and AdSize")\ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1119,6 +1119,8 @@ switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; + if (insn->hasAdSize) + attrMask |= ATTR_ADSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; @@ -1175,6 +1177,8 @@ case 0x66: if (insn->mode != MODE_16BIT) attrMask |= ATTR_OPSIZE; + if (insn->hasAdSize) + attrMask |= ATTR_ADSIZE; break; case 0x67: attrMask |= ATTR_ADSIZE; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4011,7 +4011,15 @@ (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, - VEX, VEX_WIG; + VEX, VEX_WIG, AdSize64; +let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in +def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), "", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, + VEX, VEX_WIG, AdSize32 { + let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}"; + let AsmVariantName = "NonParsable"; +} let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -4020,7 +4028,15 @@ let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, + AdSize64; +let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in +def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "addr32 maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, + AdSize32 { + let AsmVariantName = "NonParsable"; +} } // ExeDomain = SSEPackedInt diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -835,8 +835,8 @@ let ResourceCycles = [1, 1, 2, 2, 2, 16, 42]; let NumMicroOps = 63; } -def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, - VMASKMOVDQU, VMASKMOVDQU64)>; +def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, MASKMOVDQUX32, + VMASKMOVDQU, VMASKMOVDQU64, VMASKMOVDQUX32)>; /////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. diff --git a/llvm/test/CodeGen/X86/maskmovdqu.ll b/llvm/test/CodeGen/X86/maskmovdqu.ll --- a/llvm/test/CodeGen/X86/maskmovdqu.ll +++ b/llvm/test/CodeGen/X86/maskmovdqu.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=i686_SSE2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_64_SSE2 +; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_x32_SSE2 ; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=i686_AVX ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=x86_64_AVX +; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+avx | FileCheck %s --check-prefix=x86_x32_AVX ; rdar://6573467 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind { @@ -20,6 +22,13 @@ ; x86_64_SSE2-NEXT: maskmovdqu %xmm1, %xmm0 ; x86_64_SSE2-NEXT: retq ; +; x86_x32_SSE2-LABEL: test: +; x86_x32_SSE2: # %bb.0: # %entry +; x86_x32_SSE2-NEXT: movq %rsi, %rdi +; x86_x32_SSE2-NEXT: # kill: def $edi killed $edi killed $rdi +; x86_x32_SSE2-NEXT: addr32 maskmovdqu %xmm1, %xmm0 +; x86_x32_SSE2-NEXT: retq +; ; i686_AVX-LABEL: test: ; i686_AVX: # %bb.0: # %entry ; i686_AVX-NEXT: pushl %edi @@ -33,6 +42,12 @@ ; x86_64_AVX-NEXT: movq %rsi, %rdi ; x86_64_AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 ; x86_64_AVX-NEXT: retq +; x86_x32_AVX-LABEL: test: +; x86_x32_AVX: # %bb.0: # %entry +; x86_x32_AVX-NEXT: movq %rsi, %rdi +; x86_x32_AVX-NEXT: # kill: def $edi killed $edi killed $rdi +; x86_x32_AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0 +; x86_x32_AVX-NEXT: retq entry: tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c ) ret void diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -5,6 +5,9 @@ ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE,X32-SSE +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX1,X32-AVX1 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX512,X32-AVX512 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c @@ -473,6 +476,11 @@ ; X64: # %bb.0: ; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f] ; X64-NEXT: retq # encoding: [0xc3] +; +; X32-LABEL: test_mm_clflush: +; X32: # %bb.0: +; X32-NEXT: clflush (%edi) # encoding: [0x67,0x0f,0xae,0x3f] +; X32-NEXT: retq # encoding: [0xc3] call void @llvm.x86.sse2.clflush(i8* %a0) ret void } @@ -1497,6 +1505,10 @@ ; X64-LABEL: test_mm_cvtsd_f64: ; X64: # %bb.0: ; X64-NEXT: retq # encoding: [0xc3] +; +; X32-LABEL: test_mm_cvtsd_f64: +; X32: # %bb.0: +; X32-NEXT: retq # encoding: [0xc3] %res = extractelement <2 x double> %a0, i32 0 ret double %res } @@ -1574,6 +1586,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_cvtsd_ss_load: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: cvtsd2ss (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x5a,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_cvtsd_ss_load: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xfb,0x5a,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_cvtsd_ss_load: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x5a,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %p1 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ret <4 x float> %res @@ -1629,6 +1656,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_cvtsi32_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: cvtsi2sd %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_cvtsi32_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_cvtsi32_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %cvt = sitofp i32 %a1 to double %res = insertelement <2 x double> %a0, double %cvt, i32 0 ret <2 x double> %res @@ -1667,6 +1709,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_cvtsi32_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_cvtsi32_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_cvtsi32_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 @@ -1856,6 +1913,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_insert_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_insert_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_insert_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 %bc = bitcast <8 x i16> %res to <2 x i64> @@ -1905,6 +1977,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %res = load <2 x double>, <2 x double>* %arg0, align 16 ret <2 x double> %res @@ -1949,6 +2036,24 @@ ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a0, align 1 %res0 = insertelement <2 x double> undef, double %ld, i32 0 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 @@ -1988,6 +2093,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res = load <2 x i64>, <2 x i64>* %a0, align 16 ret <2 x i64> %res } @@ -2035,6 +2155,26 @@ ; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load1_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load1_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x12,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load1_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x12,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a0, align 8 %res0 = insertelement <2 x double> undef, double %ld, i32 0 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 @@ -2080,6 +2220,24 @@ ; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07] ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadh_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movhps (%edi), %xmm0 # encoding: [0x67,0x0f,0x16,0x07] +; X32-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadh_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovhps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x16,0x07] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadh_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovhps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x16,0x07] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 1 ret <2 x double> %res @@ -2124,6 +2282,24 @@ ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadl_epi64: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadl_epi64: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadl_epi64: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] %bc = bitcast <2 x i64>* %a1 to i64* %ld = load i64, i64* %bc, align 1 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 @@ -2170,6 +2346,24 @@ ; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadl_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlps (%edi), %xmm0 # encoding: [0x67,0x0f,0x12,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadl_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovlps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x12,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadl_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovlps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x12,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 0 ret <2 x double> %res @@ -2216,6 +2410,25 @@ ; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] ; X64-AVX512-NEXT: # xmm0 = mem[1,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadr_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07] +; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadr_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpermilpd $1, (%edi), %xmm0 # encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01] +; X32-AVX1-NEXT: # xmm0 = mem[1,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadr_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpermilpd $1, (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01] +; X32-AVX512-NEXT: # xmm0 = mem[1,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %ld = load <2 x double>, <2 x double>* %arg0, align 16 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> @@ -2255,6 +2468,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %res = load <2 x double>, <2 x double>* %arg0, align 1 ret <2 x double> %res @@ -2293,6 +2521,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res = load <2 x i64>, <2 x i64>* %a0, align 1 ret <2 x i64> %res } @@ -2336,6 +2579,24 @@ ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si64: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si64: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si64: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %__v.i = bitcast i8* %A to i64* %0 = load i64, i64* %__v.i, align 1 @@ -2382,6 +2643,24 @@ ; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si32: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movss (%edi), %xmm0 # encoding: [0x67,0xf3,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si32: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovss (%edi), %xmm0 # encoding: [0x67,0xc5,0xfa,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si32: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovss (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfa,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %__v.i = bitcast i8* %A to i32* %0 = load i32, i32* %__v.i, align 1 @@ -2429,6 +2708,24 @@ ; X64-AVX512-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] ; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si16: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si16: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si16: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07] +; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %__v.i = bitcast i8* %A to i16* %0 = load i16, i16* %__v.i, align 1 @@ -2486,6 +2783,18 @@ ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_maskmoveu_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: # kill: def $edi killed $edi killed $rdi +; X32-SSE-NEXT: addr32 maskmovdqu %xmm1, %xmm0 # encoding: [0x67,0x66,0x0f,0xf7,0xc1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX-LABEL: test_mm_maskmoveu_si128: +; X32-AVX: # %bb.0: +; X32-AVX-NEXT: # kill: def $edi killed $edi killed $rdi +; X32-AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0 # encoding: [0x67,0xc5,0xf9,0xf7,0xc1] +; X32-AVX-NEXT: retq # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) @@ -3300,6 +3609,144 @@ ; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] ; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi8: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi8: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x48] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi8: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x48] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX512-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 @@ -3450,6 +3897,62 @@ ; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] ; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10] +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08] +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2] +; X32-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X32-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10] +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] +; X32-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] +; X32-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] +; X32-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X32-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05] +; X32-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06] +; X32-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10] +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08] +; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] +; X32-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] +; X32-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] +; X32-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X32-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05] +; X32-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] +; X32-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 @@ -3528,6 +4031,36 @@ ; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] ; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] +; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2] +; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi32: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X32-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] +; X32-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] +; X32-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi32: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X32-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] +; X32-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] +; X32-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 @@ -3598,6 +4131,30 @@ ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi64x: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf] +; X32-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi64x: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce] +; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi64x: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce] +; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 ret <2 x i64> %res1 @@ -3652,6 +4209,25 @@ ; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0] +; X32-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a1, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -3699,6 +4275,24 @@ ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_pd1: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_pd1: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_pd1: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -3746,6 +4340,24 @@ ; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 ret <2 x double> %res1 @@ -3802,6 +4414,31 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi8: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi8: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; X32-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi8: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 @@ -3871,6 +4508,29 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 @@ -3924,6 +4584,25 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi32: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi32: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 @@ -3982,6 +4661,25 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi64x: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] +; X32-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi64x: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi64x: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 ret <2 x i64> %res1 @@ -4029,6 +4727,24 @@ ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -4310,6 +5026,144 @@ ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] ; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi8: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi8: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] +; X32-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] +; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi8: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] +; X32-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] +; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 @@ -4460,6 +5314,62 @@ ; X64-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; X64-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10] +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde] +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10] +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08] +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01] +; X32-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02] +; X32-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X32-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] +; X32-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] +; X32-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X32-AVX1-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10] +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08] +; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01] +; X32-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02] +; X32-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X32-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] +; X32-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] +; X32-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X32-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 @@ -4538,6 +5448,36 @@ ; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] +; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6] +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi32: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] +; X32-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; X32-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi32: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] +; X32-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; X32-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 @@ -4608,6 +5548,30 @@ ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi64x: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce] +; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi64x: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] +; X32-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] +; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi64x: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] +; X32-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] +; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 ret <2 x i64> %res1 @@ -4661,6 +5625,24 @@ ; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] ; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] +; X32-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] +; X32-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 ret <2 x double> %res1 @@ -5053,6 +6035,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_sqrt_sd_scalar: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_sqrt_sd_scalar: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_sqrt_sd_scalar: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %sqrt = call double @llvm.sqrt.f64(double %a0) ret double %sqrt } @@ -5334,6 +6331,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 16 ret void @@ -5384,6 +6396,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_pd1: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_pd1: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_pd1: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double * %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -5423,6 +6456,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 0 store double %ext, double* %a0, align 1 ret void @@ -5461,6 +6509,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 16 ret void } @@ -5510,6 +6573,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store1_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store1_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store1_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double * %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -5561,6 +6645,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeh_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[1,1] +; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeh_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeh_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 1 store double %ext, double* %a0, align 8 ret void @@ -5602,6 +6707,24 @@ ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storel_epi64: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storel_epi64: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storel_epi64: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x i64> %a1, i32 0 %bc = bitcast <2 x i64> *%a0 to i64* store i64 %ext, i64* %bc, align 8 @@ -5641,6 +6764,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storel_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storel_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storel_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 0 store double %ext, double* %a0, align 8 ret void @@ -5691,6 +6829,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] ; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storer_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storer_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX1-NEXT: vmovapd %xmm0, (%edi) # encoding: [0x67,0xc5,0xf9,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storer_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX512-NEXT: vmovapd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf9,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -5730,6 +6889,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 1 ret void @@ -5768,6 +6942,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 1 ret void } @@ -5808,6 +6997,24 @@ ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si64: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si64: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si64: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %vecext.i = extractelement <2 x i64> %B, i32 0 %__v.i = bitcast i8* %A to i64* @@ -5854,6 +7061,24 @@ ; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si32: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si32: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si32: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %0 = bitcast <2 x i64> %B to <4 x i32> %vecext.i = extractelement <4 x i32> %0, i32 0 @@ -5901,6 +7126,24 @@ ; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si16: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si16: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si16: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %0 = bitcast <2 x i64> %B to <8 x i16> %vecext.i = extractelement <8 x i16> %0, i32 0 @@ -5942,6 +7185,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_stream_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_stream_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_stream_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 ret void @@ -5959,6 +7217,11 @@ ; X64: # %bb.0: ; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37] ; X64-NEXT: retq # encoding: [0xc3] +; +; X32-LABEL: test_mm_stream_si32: +; X32: # %bb.0: +; X32-NEXT: movntil %esi, (%edi) # encoding: [0x67,0x0f,0xc3,0x37] +; X32-NEXT: retq # encoding: [0xc3] store i32 %a1, i32* %a0, align 1, !nontemporal !0 ret void } @@ -5996,6 +7259,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_stream_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_stream_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_stream_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 ret void } diff --git a/llvm/test/MC/X86/maskmovdqu.s b/llvm/test/MC/X86/maskmovdqu.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/maskmovdqu.s @@ -0,0 +1,15 @@ +// RUN: llvm-mc -triple i386-- --show-encoding %s |\ +// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING + +// RUN: llvm-mc -triple i386-- -filetype=obj %s |\ +// RUN: llvm-objdump -d - | FileCheck %s + +// CHECK-NOT: addr32 +// CHECK: maskmovdqu %xmm1, %xmm0 +// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1] +maskmovdqu %xmm1, %xmm0 + +// CHECK-NOT: addr32 +// CHECK: vmaskmovdqu %xmm1, %xmm0 +// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1] +vmaskmovdqu %xmm1, %xmm0 diff --git a/llvm/test/MC/X86/maskmovdqu64.s b/llvm/test/MC/X86/maskmovdqu64.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/maskmovdqu64.s @@ -0,0 +1,27 @@ +// RUN: llvm-mc -triple x86_64-- --show-encoding %s |\ +// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING + +// RUN: llvm-mc -triple x86_64-- -filetype=obj %s |\ +// RUN: llvm-objdump -d - | FileCheck %s + +// CHECK-NOT: addr32 +// CHECK: maskmovdqu %xmm1, %xmm0 +// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1] +maskmovdqu %xmm1, %xmm0 + +// CHECK-NOT: addr32 +// CHECK: vmaskmovdqu %xmm1, %xmm0 +// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1] +vmaskmovdqu %xmm1, %xmm0 + +// CHECK: addr32 +// ENCODING: encoding: [0x67] +// CHECK: maskmovdqu %xmm1, %xmm0 +// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1] +addr32 maskmovdqu %xmm1, %xmm0 + +// CHECK: addr32 +// ENCODING: encoding: [0x67] +// CHECK: vmaskmovdqu %xmm1, %xmm0 +// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1] +addr32 vmaskmovdqu %xmm1, %xmm0 diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -102,7 +102,8 @@ case IC_64BIT_ADSIZE: return (noPrefix && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE, noPrefix)); case IC_64BIT_OPSIZE_ADSIZE: - return false; + return (noPrefix && + inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE, noPrefix)); case IC_XD: return inheritsFrom(child, IC_64BIT_XD); case IC_XS: @@ -123,10 +124,11 @@ case IC_64BIT_OPSIZE: return inheritsFrom(child, IC_64BIT_REXW_OPSIZE) || (!AdSize64 && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE)) || - (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)); + (!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)) || + (!AdSize64 && inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE)); case IC_64BIT_XD: - return(inheritsFrom(child, IC_64BIT_REXW_XD) || - (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE))); + return (inheritsFrom(child, IC_64BIT_REXW_XD) || + (!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE))); case IC_64BIT_XS: return(inheritsFrom(child, IC_64BIT_REXW_XS) || (!AdSize64 && inheritsFrom(child, IC_64BIT_XS_ADSIZE))); @@ -156,7 +158,12 @@ case IC_VEX_OPSIZE: return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) || (VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) || - (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)); + (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)) || + inheritsFrom(child, IC_64BIT_VEX_OPSIZE); + case IC_64BIT_VEX_OPSIZE: + return inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE); + case IC_64BIT_VEX_OPSIZE_ADSIZE: + return false; case IC_VEX_W: return VEX_LIG && inheritsFrom(child, IC_VEX_L_W); case IC_VEX_W_XS: @@ -881,6 +888,9 @@ if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) { if (index & ATTR_EVEX) o << "IC_EVEX"; + else if ((index & (ATTR_64BIT | ATTR_VEXL | ATTR_REXW | ATTR_OPSIZE)) == + (ATTR_64BIT | ATTR_OPSIZE)) + o << "IC_64BIT_VEX"; else o << "IC_VEX"; @@ -892,9 +902,13 @@ if (index & ATTR_REXW) o << "_W"; - if (index & ATTR_OPSIZE) + if (index & ATTR_OPSIZE) { o << "_OPSIZE"; - else if (index & ATTR_XD) + if ((index & (ATTR_64BIT | ATTR_EVEX | ATTR_VEX | ATTR_VEXL | + ATTR_REXW | ATTR_ADSIZE)) == + (ATTR_64BIT | ATTR_VEX | ATTR_ADSIZE)) + o << "_ADSIZE"; + } else if (index & ATTR_XD) o << "_XD"; else if (index & ATTR_XS) o << "_XS"; @@ -908,8 +922,7 @@ if (index & ATTR_EVEXB) o << "_B"; } - } - else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) + } else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) o << "IC_64BIT_REXW_XS"; else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD)) o << "IC_64BIT_REXW_XD"; diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -125,13 +125,7 @@ return; } - // Special case since there is no attribute class for 64-bit and VEX - if (Name == "VMASKMOVDQU64") { - ShouldBeEmitted = false; - return; - } - - ShouldBeEmitted = true; + ShouldBeEmitted = true; } void RecognizableInstr::processInstr(DisassemblerTables &tables, @@ -267,6 +261,11 @@ insnContext = IC_VEX_L_OPSIZE; else if (OpPrefix == X86Local::PD && HasVEX_W) insnContext = IC_VEX_W_OPSIZE; + else if (OpPrefix == X86Local::PD && Is64Bit && + AdSize == X86Local::AdSize32) + insnContext = IC_64BIT_VEX_OPSIZE_ADSIZE; + else if (OpPrefix == X86Local::PD && Is64Bit) + insnContext = IC_64BIT_VEX_OPSIZE; else if (OpPrefix == X86Local::PD) insnContext = IC_VEX_OPSIZE; else if (HasVEX_LPrefix && OpPrefix == X86Local::XS)