diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -116,6 +116,7 @@ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ + ENUM_ENTRY(IC_VEX_OPSIZE_ADSIZE, 3, "requires VEX and the OpSize and AdSize prefix")\ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1175,6 +1175,8 @@ case 0x66: if (insn->mode != MODE_16BIT) attrMask |= ATTR_OPSIZE; + if (insn->hasAdSize) + attrMask |= ATTR_ADSIZE; break; case 0x67: attrMask |= ATTR_ADSIZE; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4011,7 +4011,15 @@ (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, - VEX, VEX_WIG; + VEX, VEX_WIG, AdSize64; +let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in +def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), "", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, + VEX, VEX_WIG, AdSize32 { + let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}"; + let AsmVariantName = "NonParsable"; +} let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -4020,7 +4028,15 @@ let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, + AdSize64; +let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in +def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "addr32 maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, + AdSize32 { + let AsmVariantName = "NonParsable"; +} } // ExeDomain = SSEPackedInt diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -834,8 +834,8 @@ let ResourceCycles = [1, 1, 2, 2, 2, 16, 42]; let NumMicroOps = 63; } -def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, - VMASKMOVDQU, VMASKMOVDQU64)>; +def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, MASKMOVDQUX32, + VMASKMOVDQU, VMASKMOVDQU64, VMASKMOVDQUX32)>; /////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. diff --git a/llvm/test/CodeGen/X86/maskmovdqu.ll b/llvm/test/CodeGen/X86/maskmovdqu.ll --- a/llvm/test/CodeGen/X86/maskmovdqu.ll +++ b/llvm/test/CodeGen/X86/maskmovdqu.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=i686_SSE2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_64_SSE2 +; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_x32_SSE2 ; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=i686_AVX ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=x86_64_AVX +; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+avx | FileCheck %s --check-prefix=x86_x32_AVX ; rdar://6573467 define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind { @@ -20,6 +22,13 @@ ; x86_64_SSE2-NEXT: maskmovdqu %xmm1, %xmm0 ; x86_64_SSE2-NEXT: retq ; +; x86_x32_SSE2-LABEL: test: +; x86_x32_SSE2: # %bb.0: # %entry +; x86_x32_SSE2-NEXT: movq %rsi, %rdi +; x86_x32_SSE2-NEXT: # kill: def $edi killed $edi killed $rdi +; x86_x32_SSE2-NEXT: addr32 maskmovdqu %xmm1, %xmm0 +; x86_x32_SSE2-NEXT: retq +; ; i686_AVX-LABEL: test: ; i686_AVX: # %bb.0: # %entry ; i686_AVX-NEXT: pushl %edi @@ -33,6 +42,13 @@ ; x86_64_AVX-NEXT: movq %rsi, %rdi ; x86_64_AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 ; x86_64_AVX-NEXT: retq +; +; x86_x32_AVX-LABEL: test: +; x86_x32_AVX: # %bb.0: # %entry +; x86_x32_AVX-NEXT: movq %rsi, %rdi +; x86_x32_AVX-NEXT: # kill: def $edi killed $edi killed $rdi +; x86_x32_AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0 +; x86_x32_AVX-NEXT: retq entry: tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c ) ret void diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -5,6 +5,9 @@ ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE,X32-SSE +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX1,X32-AVX1 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown-gnux32 -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX,X32-AVX,AVX512,X32-AVX512 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c @@ -473,6 +476,11 @@ ; X64: # %bb.0: ; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f] ; X64-NEXT: retq # encoding: [0xc3] +; +; X32-LABEL: test_mm_clflush: +; X32: # %bb.0: +; X32-NEXT: clflush (%edi) # encoding: [0x67,0x0f,0xae,0x3f] +; X32-NEXT: retq # encoding: [0xc3] call void @llvm.x86.sse2.clflush(i8* %a0) ret void } @@ -1497,6 +1505,10 @@ ; X64-LABEL: test_mm_cvtsd_f64: ; X64: # %bb.0: ; X64-NEXT: retq # encoding: [0xc3] +; +; X32-LABEL: test_mm_cvtsd_f64: +; X32: # %bb.0: +; X32-NEXT: retq # encoding: [0xc3] %res = extractelement <2 x double> %a0, i32 0 ret double %res } @@ -1574,6 +1586,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_cvtsd_ss_load: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: cvtsd2ss (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x5a,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_cvtsd_ss_load: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xfb,0x5a,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_cvtsd_ss_load: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vcvtsd2ss (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x5a,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %p1 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ret <4 x float> %res @@ -1629,6 +1656,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_cvtsi32_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: cvtsi2sd %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_cvtsi32_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_cvtsi32_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %cvt = sitofp i32 %a1 to double %res = insertelement <2 x double> %a0, double %cvt, i32 0 ret <2 x double> %res @@ -1667,6 +1709,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_cvtsi32_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_cvtsi32_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_cvtsi32_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 @@ -1856,6 +1913,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_insert_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_insert_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_insert_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 %bc = bitcast <8 x i16> %res to <2 x i64> @@ -1905,6 +1977,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %res = load <2 x double>, <2 x double>* %arg0, align 16 ret <2 x double> %res @@ -1949,6 +2036,24 @@ ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a0, align 1 %res0 = insertelement <2 x double> undef, double %ld, i32 0 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 @@ -1988,6 +2093,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x28,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res = load <2 x i64>, <2 x i64>* %a0, align 16 ret <2 x i64> %res } @@ -2035,6 +2155,26 @@ ; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_load1_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_load1_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x12,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_load1_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x12,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a0, align 8 %res0 = insertelement <2 x double> undef, double %ld, i32 0 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 @@ -2080,6 +2220,24 @@ ; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07] ; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadh_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movhps (%edi), %xmm0 # encoding: [0x67,0x0f,0x16,0x07] +; X32-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadh_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovhps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x16,0x07] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadh_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovhps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x16,0x07] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 1 ret <2 x double> %res @@ -2124,6 +2282,24 @@ ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadl_epi64: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadl_epi64: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadl_epi64: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] %bc = bitcast <2 x i64>* %a1 to i64* %ld = load i64, i64* %bc, align 1 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 @@ -2170,6 +2346,24 @@ ; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadl_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlps (%edi), %xmm0 # encoding: [0x67,0x0f,0x12,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadl_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovlps (%edi), %xmm0, %xmm0 # encoding: [0x67,0xc5,0xf8,0x12,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadl_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovlps (%edi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x12,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 0 ret <2 x double> %res @@ -2216,6 +2410,25 @@ ; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] ; X64-AVX512-NEXT: # xmm0 = mem[1,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadr_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps (%edi), %xmm0 # encoding: [0x67,0x0f,0x28,0x07] +; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadr_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpermilpd $1, (%edi), %xmm0 # encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01] +; X32-AVX1-NEXT: # xmm0 = mem[1,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadr_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpermilpd $1, (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc4,0xe3,0x79,0x05,0x07,0x01] +; X32-AVX512-NEXT: # xmm0 = mem[1,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %ld = load <2 x double>, <2 x double>* %arg0, align 16 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> @@ -2255,6 +2468,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %res = load <2 x double>, <2 x double>* %arg0, align 1 ret <2 x double> %res @@ -2293,6 +2521,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups (%edi), %xmm0 # encoding: [0x67,0x0f,0x10,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups (%edi), %xmm0 # encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x10,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res = load <2 x i64>, <2 x i64>* %a0, align 1 ret <2 x i64> %res } @@ -2336,6 +2579,24 @@ ; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si64: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movsd (%edi), %xmm0 # encoding: [0x67,0xf2,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si64: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovsd (%edi), %xmm0 # encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si64: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovsd (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %__v.i = bitcast i8* %A to i64* %0 = load i64, i64* %__v.i, align 1 @@ -2382,6 +2643,24 @@ ; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] ; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si32: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movss (%edi), %xmm0 # encoding: [0x67,0xf3,0x0f,0x10,0x07] +; X32-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si32: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovss (%edi), %xmm0 # encoding: [0x67,0xc5,0xfa,0x10,0x07] +; X32-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si32: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovss (%edi), %xmm0 # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfa,0x10,0x07] +; X32-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %__v.i = bitcast i8* %A to i32* %0 = load i32, i32* %__v.i, align 1 @@ -2429,6 +2708,24 @@ ; X64-AVX512-NEXT: movzwl (%rdi), %eax # encoding: [0x0f,0xb7,0x07] ; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_loadu_si16: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_loadu_si16: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_loadu_si16: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: movzwl (%edi), %eax # encoding: [0x67,0x0f,0xb7,0x07] +; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %__v.i = bitcast i8* %A to i16* %0 = load i16, i16* %__v.i, align 1 @@ -2486,6 +2783,18 @@ ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_maskmoveu_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: # kill: def $edi killed $edi killed $rdi +; X32-SSE-NEXT: addr32 maskmovdqu %xmm1, %xmm0 # encoding: [0x67,0x66,0x0f,0xf7,0xc1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX-LABEL: test_mm_maskmoveu_si128: +; X32-AVX: # %bb.0: +; X32-AVX-NEXT: # kill: def $edi killed $edi killed $rdi +; X32-AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0 # encoding: [0x67,0xc5,0xf9,0xf7,0xc1] +; X32-AVX-NEXT: retq # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) @@ -3300,6 +3609,144 @@ ; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] ; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi8: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi8: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x48] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi8: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb6,0x54,0x24,0x48] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX512-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 @@ -3450,6 +3897,62 @@ ; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] ; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10] +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08] +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2] +; X32-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X32-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10] +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] +; X32-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] +; X32-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] +; X32-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X32-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05] +; X32-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06] +; X32-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10] +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08] +; X32-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] +; X32-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] +; X32-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] +; X32-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X32-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05] +; X32-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] +; X32-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 @@ -3528,6 +4031,36 @@ ; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] ; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] +; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2] +; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi32: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X32-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] +; X32-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] +; X32-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi32: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X32-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] +; X32-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] +; X32-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 @@ -3598,6 +4131,30 @@ ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_epi64x: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf] +; X32-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_epi64x: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce] +; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_epi64x: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce] +; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 ret <2 x i64> %res1 @@ -3652,6 +4209,25 @@ ; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0] +; X32-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a1, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -3699,6 +4275,24 @@ ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_pd1: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_pd1: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_pd1: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -3746,6 +4340,24 @@ ; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0],zero +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0],zero +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0],zero +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 ret <2 x double> %res1 @@ -3802,6 +4414,31 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi8: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi8: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X32-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; X32-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi8: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 @@ -3871,6 +4508,29 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 @@ -3924,6 +4584,25 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi32: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi32: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 @@ -3982,6 +4661,25 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_epi64x: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] +; X32-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_epi64x: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_epi64x: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 ret <2 x i64> %res1 @@ -4029,6 +4727,24 @@ ; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_set1_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_set1_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_set1_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -4310,6 +5026,144 @@ ; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] ; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi8: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X32-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X32-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X32-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X32-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X32-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X32-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi8: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] +; X32-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] +; X32-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X32-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi8: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X32-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] +; X32-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] +; X32-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X32-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X32-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X32-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X32-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X32-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X32-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X32-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x08] +; X32-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x10] +; X32-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x18] +; X32-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x20] +; X32-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x28] +; X32-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x30] +; X32-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x38] +; X32-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x40] +; X32-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x48] +; X32-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X32-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb6,0x44,0x24,0x50] +; X32-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 @@ -4460,6 +5314,62 @@ ; X64-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; X64-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi16: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x10] +; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x08] +; X32-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X32-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] +; X32-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] +; X32-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X32-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde] +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X32-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi16: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10] +; X32-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08] +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01] +; X32-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02] +; X32-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X32-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] +; X32-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] +; X32-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X32-AVX1-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi16: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %r10d # encoding: [0x67,0x44,0x0f,0xb7,0x54,0x24,0x10] +; X32-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x0f,0xb7,0x44,0x24,0x08] +; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01] +; X32-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02] +; X32-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X32-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] +; X32-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] +; X32-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X32-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 @@ -4538,6 +5448,36 @@ ; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X32-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] +; X32-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] +; X32-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6] +; X32-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi32: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] +; X32-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; X32-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi32: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X32-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] +; X32-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; X32-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 @@ -4608,6 +5548,30 @@ ; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] ; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_epi64x: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce] +; X32-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] +; X32-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_epi64x: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] +; X32-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] +; X32-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_epi64x: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] +; X32-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] +; X32-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 ret <2 x i64> %res1 @@ -4661,6 +5625,24 @@ ; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] ; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_setr_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X32-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_setr_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] +; X32-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_setr_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] +; X32-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 ret <2 x double> %res1 @@ -5053,6 +6035,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_sqrt_sd_scalar: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_sqrt_sd_scalar: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_sqrt_sd_scalar: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %sqrt = call double @llvm.sqrt.f64(double %a0) ret double %sqrt } @@ -5334,6 +6331,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 16 ret void @@ -5384,6 +6396,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_pd1: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_pd1: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_pd1: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double * %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -5423,6 +6456,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 0 store double %ext, double* %a0, align 1 ret void @@ -5461,6 +6509,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 16 ret void } @@ -5510,6 +6573,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_store1_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[0,0] +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_store1_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX1-NEXT: vmovaps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_store1_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X32-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X32-AVX512-NEXT: vmovaps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double * %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -5561,6 +6645,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeh_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] +; X32-SSE-NEXT: # xmm0 = xmm0[1,1] +; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeh_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeh_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 1 store double %ext, double* %a0, align 8 ret void @@ -5602,6 +6707,24 @@ ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storel_epi64: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storel_epi64: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storel_epi64: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x i64> %a1, i32 0 %bc = bitcast <2 x i64> *%a0 to i64* store i64 %ext, i64* %bc, align 8 @@ -5641,6 +6764,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storel_sd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movsd %xmm0, (%edi) # encoding: [0x67,0xf2,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storel_sd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovsd %xmm0, (%edi) # encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storel_sd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovsd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xfb,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 0 store double %ext, double* %a0, align 8 ret void @@ -5691,6 +6829,27 @@ ; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] ; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storer_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X32-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] +; X32-SSE-NEXT: movaps %xmm0, (%edi) # encoding: [0x67,0x0f,0x29,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storer_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX1-NEXT: vmovapd %xmm0, (%edi) # encoding: [0x67,0xc5,0xf9,0x29,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storer_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X32-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X32-AVX512-NEXT: vmovapd %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf9,0x29,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -5730,6 +6889,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 1 ret void @@ -5768,6 +6942,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movups %xmm0, (%edi) # encoding: [0x67,0x0f,0x11,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovups %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovups %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x11,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 1 ret void } @@ -5808,6 +6997,24 @@ ; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si64: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si64: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si64: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movq %rax, (%edi) # encoding: [0x67,0x48,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %vecext.i = extractelement <2 x i64> %B, i32 0 %__v.i = bitcast i8* %A to i64* @@ -5854,6 +7061,24 @@ ; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si32: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si32: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si32: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movl %eax, (%edi) # encoding: [0x67,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %0 = bitcast <2 x i64> %B to <4 x i32> %vecext.i = extractelement <4 x i32> %0, i32 0 @@ -5901,6 +7126,24 @@ ; X64-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] ; X64-AVX512-NEXT: movw %ax, (%rdi) # encoding: [0x66,0x89,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_storeu_si16: +; X32-SSE: # %bb.0: # %entry +; X32-SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] +; X32-SSE-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_storeu_si16: +; X32-AVX1: # %bb.0: # %entry +; X32-AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX1-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_storeu_si16: +; X32-AVX512: # %bb.0: # %entry +; X32-AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] +; X32-AVX512-NEXT: movw %ax, (%edi) # encoding: [0x67,0x66,0x89,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] entry: %0 = bitcast <2 x i64> %B to <8 x i16> %vecext.i = extractelement <8 x i16> %0, i32 0 @@ -5942,6 +7185,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_stream_pd: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_stream_pd: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_stream_pd: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 ret void @@ -5959,6 +7217,11 @@ ; X64: # %bb.0: ; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37] ; X64-NEXT: retq # encoding: [0xc3] +; +; X32-LABEL: test_mm_stream_si32: +; X32: # %bb.0: +; X32-NEXT: movntil %esi, (%edi) # encoding: [0x67,0x0f,0xc3,0x37] +; X32-NEXT: retq # encoding: [0xc3] store i32 %a1, i32* %a0, align 1, !nontemporal !0 ret void } @@ -5996,6 +7259,21 @@ ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] ; X64-AVX512-NEXT: retq # encoding: [0xc3] +; +; X32-SSE-LABEL: test_mm_stream_si128: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: movntps %xmm0, (%edi) # encoding: [0x67,0x0f,0x2b,0x07] +; X32-SSE-NEXT: retq # encoding: [0xc3] +; +; X32-AVX1-LABEL: test_mm_stream_si128: +; X32-AVX1: # %bb.0: +; X32-AVX1-NEXT: vmovntps %xmm0, (%edi) # encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX1-NEXT: retq # encoding: [0xc3] +; +; X32-AVX512-LABEL: test_mm_stream_si128: +; X32-AVX512: # %bb.0: +; X32-AVX512-NEXT: vmovntps %xmm0, (%edi) # EVEX TO VEX Compression encoding: [0x67,0xc5,0xf8,0x2b,0x07] +; X32-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 ret void } diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -156,7 +156,10 @@ case IC_VEX_OPSIZE: return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) || (VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) || - (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)); + (VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)) || + inheritsFrom(child, IC_VEX_OPSIZE_ADSIZE); + case IC_VEX_OPSIZE_ADSIZE: + return false; case IC_VEX_W: return VEX_LIG && inheritsFrom(child, IC_VEX_L_W); case IC_VEX_W_XS: diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -268,7 +268,9 @@ else if (OpPrefix == X86Local::PD && HasVEX_W) insnContext = IC_VEX_W_OPSIZE; else if (OpPrefix == X86Local::PD) - insnContext = IC_VEX_OPSIZE; + insnContext = Is64Bit && AdSize == X86Local::AdSize32 + ? IC_VEX_OPSIZE_ADSIZE + : IC_VEX_OPSIZE; else if (HasVEX_LPrefix && OpPrefix == X86Local::XS) insnContext = IC_VEX_L_XS; else if (HasVEX_LPrefix && OpPrefix == X86Local::XD)