Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -1406,6 +1406,78 @@ [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vpermil_pd_128 : + GCCBuiltin<"__builtin_ia32_vpermilpd_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_pd_256 : + GCCBuiltin<"__builtin_ia32_vpermilpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_pd_512 : + GCCBuiltin<"__builtin_ia32_vpermilpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_ps_128 : + GCCBuiltin<"__builtin_ia32_vpermilps_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_ps_256 : + GCCBuiltin<"__builtin_ia32_vpermilps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermil_ps_512 : + GCCBuiltin<"__builtin_ia32_vpermilps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_pd_256 : + GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_pd_512 : + GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_pd_128 : + GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_ps_256 : + GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_ps_512 : + GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_vpermilvar_ps_128 : + GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128_mask">, Intrinsic<[llvm_v16i8_ty], Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15829,11 +15829,16 @@ RoundingMode, Sae), Mask, Src0, Subtarget, DAG); } - case INTR_TYPE_2OP_MASK: { + case INTR_TYPE_2OP_MASK: + case INTR_TYPE_2OP_IMM8_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); + + if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK) + Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2); + // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -1011,62 +1011,6 @@ } //===----------------------------------------------------------------------===// -// AVX-512 - VPERM -// -// -- immediate form -- -multiclass avx512_perm_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { - def ri : AVX512AIi8, - EVEX; - def mi : AVX512AIi8, - EVEX, EVEX_CD8<_.EltSize, CD8VF>; -} -} - -multiclass avx512_permil OpcImm, bits<8> OpcVar, X86VectorVTInfo _, - X86VectorVTInfo Ctrl> : - avx512_perm_imm { - let ExeDomain = _.ExeDomain in { - def rr : AVX5128I, - EVEX_4V; - def rm : AVX5128I, - EVEX_4V; - } -} -defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, - EVEX_V512; -defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, - EVEX_V512, VEX_W; - -def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPSZri VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPDZri VR512:$src1, imm:$imm)>; - // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { @@ -3900,7 +3844,72 @@ defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", X86VPermi, avx512vl_f64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - VPERMIL +//===----------------------------------------------------------------------===// + +multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, X86VectorVTInfo Ctrl> { + defm rr: AVX512_maskable, + T8PD, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable, + T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + defm rmb: AVX512_maskable, + T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + }//let mayLoad = 1 +} + +multiclass avx512_permil_vec_common OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_permil_vec, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_permil_vec, EVEX_V128; + defm Z256 : avx512_permil_vec, EVEX_V256; + } +} + +multiclass avx512_permil OpcImm, bits<8> OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + + defm NAME: avx512_permil_vec_common; + defm NAME: avx512_shift_rmi_sizes, + EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; + + let isCodeGenOnly = 1 in { + defm NAME#_I: avx512_permil_vec_common; + defm NAME#_I: avx512_shift_rmi_sizes, + EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; + } +} +defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, + avx512vl_i32_info>; +defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, + avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW //===----------------------------------------------------------------------===// Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -8087,17 +8087,19 @@ (bitconvert (i_frag addr:$src2))))]>, VEX_4V, Sched<[WriteFShuffleLd, ReadAfterLd]>; - def ri : AVXAIi8, VEX, Sched<[WriteFShuffle]>; - def mi : AVXAIi8, VEX, Sched<[WriteFShuffleLd]>; + }// Predicates = [HasAVX, NoVLX] } let ExeDomain = SSEPackedSingle in { @@ -8113,7 +8115,7 @@ loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))), (VPERMILPSYrr VR256:$src1, VR256:$src2)>; def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -22,7 +22,7 @@ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, - INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, + INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, VPERM_3OP_MASKZ, @@ -1341,6 +1341,30 @@ X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_128, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_256, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_512, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_128, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_256, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_512, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -4080,3 +4080,84 @@ ret <16 x float> %res2 } +declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) + %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) + %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res3, %res2 + ret <8 x double> %res4 +} + +declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res3, %res2 + ret <16 x float> %res4 +} + +declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) + %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) + %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res2, %res3 + ret <8 x double> %res4 +} + +declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res2, %res3 + ret <16 x float> %res4 +} Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4724,3 +4724,172 @@ ret <4 x i64> %res2 } +declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 +; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) + %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) + %res3 = fadd <4 x double> %res, %res1 + %res4 = fadd <4 x double> %res2, %res3 + ret <4 x double> %res4 +} + +declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) + %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 +; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) + %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 +; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) + %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + +declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) + %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) + %res3 = fadd <4 x double> %res, %res1 + %res4 = fadd <4 x double> %res2, %res3 + ret <4 x double> %res4 +} + +declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) + %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) + %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z} +; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1 +; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) + %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + + Index: test/CodeGen/X86/vector-shuffle-128-v2.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v2.ll +++ test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" @@ -135,6 +136,11 @@ ; AVX: # BB#0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX-NEXT: retq + +; AVX512VL-LABEL: shuffle_v2f64_10: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermilpd $1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } @@ -191,6 +197,11 @@ ; AVX: # BB#0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] ; AVX-NEXT: retq + +; AVX512VL-LABEL: shuffle_v2f64_32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermilpd $1, %xmm1, %xmm0 +; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } @@ -1167,6 +1178,11 @@ ; AVX: # BB#0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] ; AVX-NEXT: retq + +; AVX512VL-LABEL: shuffle_mem_v2f64_10: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermilpd $1, (%rdi), %xmm0 +; AVX512VL-NEXT: retq %a = load <2 x double>, <2 x double>* %ptr %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> ret <2 x double> %shuffle Index: test/CodeGen/X86/vector-shuffle-256-v4.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-256-v4.ll +++ test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VL target triple = "x86_64-unknown-unknown" @@ -133,6 +134,11 @@ ; ALL: # BB#0: ; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] ; ALL-NEXT: retq + +; AVX512VL-LABEL: shuffle_v4f64_0023: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpermilpd $8, %ymm0, %ymm0 +; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } Index: test/MC/X86/avx512-encodings.s =================================================================== --- test/MC/X86/avx512-encodings.s +++ test/MC/X86/avx512-encodings.s @@ -15378,3 +15378,235 @@ // CHECK: encoding: [0x62,0xa2,0xfd,0x41,0xa2,0x94,0x81,0x00,0x04,0x00,0x00] vscatterdpd %zmm18, 1024(%rcx, %ymm24,4) {%k1} +// CHECK: vpermilps $171, %zmm22, %zmm2 +// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0xab] + vpermilps $0xab, %zmm22, %zmm2 + +// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2} +// CHECK: encoding: [0x62,0xb3,0x7d,0x4a,0x04,0xd6,0xab] + vpermilps $0xab, %zmm22, %zmm2 {%k2} + +// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2} {z} +// CHECK: encoding: [0x62,0xb3,0x7d,0xca,0x04,0xd6,0xab] + vpermilps $0xab, %zmm22, %zmm2 {%k2} {z} + +// CHECK: vpermilps $123, %zmm22, %zmm2 +// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0x7b] + vpermilps $0x7b, %zmm22, %zmm2 + +// CHECK: vpermilps $123, (%rcx), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x11,0x7b] + vpermilps $0x7b, (%rcx), %zmm2 + +// CHECK: vpermilps $123, 291(%rax,%r14,8), %zmm2 +// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermilps $0x7b, 291(%rax,%r14,8), %zmm2 + +// CHECK: vpermilps $123, (%rcx){1to16}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x11,0x7b] + vpermilps $0x7b, (%rcx){1to16}, %zmm2 + +// CHECK: vpermilps $123, 8128(%rdx), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x7f,0x7b] + vpermilps $0x7b, 8128(%rdx), %zmm2 + +// CHECK: vpermilps $123, 8192(%rdx), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0x00,0x20,0x00,0x00,0x7b] + vpermilps $0x7b, 8192(%rdx), %zmm2 + +// CHECK: vpermilps $123, -8192(%rdx), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x80,0x7b] + vpermilps $0x7b, -8192(%rdx), %zmm2 + +// CHECK: vpermilps $123, -8256(%rdx), %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0xc0,0xdf,0xff,0xff,0x7b] + vpermilps $0x7b, -8256(%rdx), %zmm2 + +// CHECK: vpermilps $123, 508(%rdx){1to16}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x7f,0x7b] + vpermilps $0x7b, 508(%rdx){1to16}, %zmm2 + +// CHECK: vpermilps $123, 512(%rdx){1to16}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0x00,0x02,0x00,0x00,0x7b] + vpermilps $0x7b, 512(%rdx){1to16}, %zmm2 + +// CHECK: vpermilps $123, -512(%rdx){1to16}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x80,0x7b] + vpermilps $0x7b, -512(%rdx){1to16}, %zmm2 + +// CHECK: vpermilps $123, -516(%rdx){1to16}, %zmm2 +// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0xfc,0xfd,0xff,0xff,0x7b] + vpermilps $0x7b, -516(%rdx){1to16}, %zmm2 + +// CHECK: vpermilps %zmm2, %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xea] + vpermilps %zmm2, %zmm20, %zmm13 + +// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1} +// CHECK: encoding: [0x62,0x72,0x5d,0x41,0x0c,0xea] + vpermilps %zmm2, %zmm20, %zmm13 {%k1} + +// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z} +// CHECK: encoding: [0x62,0x72,0x5d,0xc1,0x0c,0xea] + vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z} + +// CHECK: vpermilps (%rcx), %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x29] + vpermilps (%rcx), %zmm20, %zmm13 + +// CHECK: vpermilps 291(%rax,%r14,8), %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x32,0x5d,0x40,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00] + vpermilps 291(%rax,%r14,8), %zmm20, %zmm13 + +// CHECK: vpermilps (%rcx){1to16}, %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x29] + vpermilps (%rcx){1to16}, %zmm20, %zmm13 + +// CHECK: vpermilps 8128(%rdx), %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x7f] + vpermilps 8128(%rdx), %zmm20, %zmm13 + +// CHECK: vpermilps 8192(%rdx), %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0x00,0x20,0x00,0x00] + vpermilps 8192(%rdx), %zmm20, %zmm13 + +// CHECK: vpermilps -8192(%rdx), %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x80] + vpermilps -8192(%rdx), %zmm20, %zmm13 + +// CHECK: vpermilps -8256(%rdx), %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0xc0,0xdf,0xff,0xff] + vpermilps -8256(%rdx), %zmm20, %zmm13 + +// CHECK: vpermilps 508(%rdx){1to16}, %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x7f] + vpermilps 508(%rdx){1to16}, %zmm20, %zmm13 + +// CHECK: vpermilps 512(%rdx){1to16}, %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0x00,0x02,0x00,0x00] + vpermilps 512(%rdx){1to16}, %zmm20, %zmm13 + +// CHECK: vpermilps -512(%rdx){1to16}, %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x80] + vpermilps -512(%rdx){1to16}, %zmm20, %zmm13 + +// CHECK: vpermilps -516(%rdx){1to16}, %zmm20, %zmm13 +// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0xfc,0xfd,0xff,0xff] + vpermilps -516(%rdx){1to16}, %zmm20, %zmm13 + +// CHECK: vpermilpd $171, %zmm4, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0xab] + vpermilpd $0xab, %zmm4, %zmm19 + +// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1} +// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x05,0xdc,0xab] + vpermilpd $0xab, %zmm4, %zmm19 {%k1} + +// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1} {z} +// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x05,0xdc,0xab] + vpermilpd $0xab, %zmm4, %zmm19 {%k1} {z} + +// CHECK: vpermilpd $123, %zmm4, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0x7b] + vpermilpd $0x7b, %zmm4, %zmm19 + +// CHECK: vpermilpd $123, (%rcx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x19,0x7b] + vpermilpd $0x7b, (%rcx), %zmm19 + +// CHECK: vpermilpd $123, 291(%rax,%r14,8), %zmm19 +// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x05,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermilpd $0x7b, 291(%rax,%r14,8), %zmm19 + +// CHECK: vpermilpd $123, (%rcx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x19,0x7b] + vpermilpd $0x7b, (%rcx){1to8}, %zmm19 + +// CHECK: vpermilpd $123, 8128(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x7f,0x7b] + vpermilpd $0x7b, 8128(%rdx), %zmm19 + +// CHECK: vpermilpd $123, 8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0x00,0x20,0x00,0x00,0x7b] + vpermilpd $0x7b, 8192(%rdx), %zmm19 + +// CHECK: vpermilpd $123, -8192(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x80,0x7b] + vpermilpd $0x7b, -8192(%rdx), %zmm19 + +// CHECK: vpermilpd $123, -8256(%rdx), %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0xc0,0xdf,0xff,0xff,0x7b] + vpermilpd $0x7b, -8256(%rdx), %zmm19 + +// CHECK: vpermilpd $123, 1016(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x7f,0x7b] + vpermilpd $0x7b, 1016(%rdx){1to8}, %zmm19 + +// CHECK: vpermilpd $123, 1024(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0x00,0x04,0x00,0x00,0x7b] + vpermilpd $0x7b, 1024(%rdx){1to8}, %zmm19 + +// CHECK: vpermilpd $123, -1024(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x80,0x7b] + vpermilpd $0x7b, -1024(%rdx){1to8}, %zmm19 + +// CHECK: vpermilpd $123, -1032(%rdx){1to8}, %zmm19 +// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0xf8,0xfb,0xff,0xff,0x7b] + vpermilpd $0x7b, -1032(%rdx){1to8}, %zmm19 + +// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0xcd] + vpermilpd %zmm21, %zmm26, %zmm1 + +// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xb2,0xad,0x42,0x0d,0xcd] + vpermilpd %zmm21, %zmm26, %zmm1 {%k2} + +// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z} +// CHECK: encoding: [0x62,0xb2,0xad,0xc2,0x0d,0xcd] + vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z} + +// CHECK: vpermilpd (%rcx), %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x09] + vpermilpd (%rcx), %zmm26, %zmm1 + +// CHECK: vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1 + +// CHECK: vpermilpd (%rcx){1to8}, %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x09] + vpermilpd (%rcx){1to8}, %zmm26, %zmm1 + +// CHECK: vpermilpd 8128(%rdx), %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x7f] + vpermilpd 8128(%rdx), %zmm26, %zmm1 + +// CHECK: vpermilpd 8192(%rdx), %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0x00,0x20,0x00,0x00] + vpermilpd 8192(%rdx), %zmm26, %zmm1 + +// CHECK: vpermilpd -8192(%rdx), %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x80] + vpermilpd -8192(%rdx), %zmm26, %zmm1 + +// CHECK: vpermilpd -8256(%rdx), %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0xc0,0xdf,0xff,0xff] + vpermilpd -8256(%rdx), %zmm26, %zmm1 + +// CHECK: vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x7f] + vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1 + +// CHECK: vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0x00,0x04,0x00,0x00] + vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1 + +// CHECK: vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x80] + vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1 + +// CHECK: vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0xf8,0xfb,0xff,0xff] + vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1 + Index: test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- test/MC/X86/x86-64-avx512f_vl.s +++ test/MC/X86/x86-64-avx512f_vl.s @@ -20347,3 +20347,467 @@ // CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00] vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1} +// CHECK: vpermilps $171, %xmm28, %xmm20 +// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0xab] + vpermilps $0xab, %xmm28, %xmm20 + +// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4} +// CHECK: encoding: [0x62,0x83,0x7d,0x0c,0x04,0xe4,0xab] + vpermilps $0xab, %xmm28, %xmm20 {%k4} + +// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4} {z} +// CHECK: encoding: [0x62,0x83,0x7d,0x8c,0x04,0xe4,0xab] + vpermilps $0xab, %xmm28, %xmm20 {%k4} {z} + +// CHECK: vpermilps $123, %xmm28, %xmm20 +// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0x7b] + vpermilps $0x7b, %xmm28, %xmm20 + +// CHECK: vpermilps $123, (%rcx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x21,0x7b] + vpermilps $0x7b, (%rcx), %xmm20 + +// CHECK: vpermilps $123, 291(%rax,%r14,8), %xmm20 +// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermilps $0x7b, 291(%rax,%r14,8), %xmm20 + +// CHECK: vpermilps $123, (%rcx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x21,0x7b] + vpermilps $0x7b, (%rcx){1to4}, %xmm20 + +// CHECK: vpermilps $123, 2032(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x7f,0x7b] + vpermilps $0x7b, 2032(%rdx), %xmm20 + +// CHECK: vpermilps $123, 2048(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0x00,0x08,0x00,0x00,0x7b] + vpermilps $0x7b, 2048(%rdx), %xmm20 + +// CHECK: vpermilps $123, -2048(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x80,0x7b] + vpermilps $0x7b, -2048(%rdx), %xmm20 + +// CHECK: vpermilps $123, -2064(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0xf0,0xf7,0xff,0xff,0x7b] + vpermilps $0x7b, -2064(%rdx), %xmm20 + +// CHECK: vpermilps $123, 508(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x7f,0x7b] + vpermilps $0x7b, 508(%rdx){1to4}, %xmm20 + +// CHECK: vpermilps $123, 512(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0x00,0x02,0x00,0x00,0x7b] + vpermilps $0x7b, 512(%rdx){1to4}, %xmm20 + +// CHECK: vpermilps $123, -512(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x80,0x7b] + vpermilps $0x7b, -512(%rdx){1to4}, %xmm20 + +// CHECK: vpermilps $123, -516(%rdx){1to4}, %xmm20 +// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vpermilps $0x7b, -516(%rdx){1to4}, %xmm20 + +// CHECK: vpermilps $171, %ymm17, %ymm30 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0xab] + vpermilps $0xab, %ymm17, %ymm30 + +// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5} +// CHECK: encoding: [0x62,0x23,0x7d,0x2d,0x04,0xf1,0xab] + vpermilps $0xab, %ymm17, %ymm30 {%k5} + +// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5} {z} +// CHECK: encoding: [0x62,0x23,0x7d,0xad,0x04,0xf1,0xab] + vpermilps $0xab, %ymm17, %ymm30 {%k5} {z} + +// CHECK: vpermilps $123, %ymm17, %ymm30 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0x7b] + vpermilps $0x7b, %ymm17, %ymm30 + +// CHECK: vpermilps $123, (%rcx), %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x31,0x7b] + vpermilps $0x7b, (%rcx), %ymm30 + +// CHECK: vpermilps $123, 291(%rax,%r14,8), %ymm30 +// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermilps $0x7b, 291(%rax,%r14,8), %ymm30 + +// CHECK: vpermilps $123, (%rcx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x31,0x7b] + vpermilps $0x7b, (%rcx){1to8}, %ymm30 + +// CHECK: vpermilps $123, 4064(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x7f,0x7b] + vpermilps $0x7b, 4064(%rdx), %ymm30 + +// CHECK: vpermilps $123, 4096(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0x00,0x10,0x00,0x00,0x7b] + vpermilps $0x7b, 4096(%rdx), %ymm30 + +// CHECK: vpermilps $123, -4096(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x80,0x7b] + vpermilps $0x7b, -4096(%rdx), %ymm30 + +// CHECK: vpermilps $123, -4128(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0xe0,0xef,0xff,0xff,0x7b] + vpermilps $0x7b, -4128(%rdx), %ymm30 + +// CHECK: vpermilps $123, 508(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x7f,0x7b] + vpermilps $0x7b, 508(%rdx){1to8}, %ymm30 + +// CHECK: vpermilps $123, 512(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0x00,0x02,0x00,0x00,0x7b] + vpermilps $0x7b, 512(%rdx){1to8}, %ymm30 + +// CHECK: vpermilps $123, -512(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x80,0x7b] + vpermilps $0x7b, -512(%rdx){1to8}, %ymm30 + +// CHECK: vpermilps $123, -516(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0xfc,0xfd,0xff,0xff,0x7b] + vpermilps $0x7b, -516(%rdx){1to8}, %ymm30 + +// CHECK: vpermilps %xmm22, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xe6] + vpermilps %xmm22, %xmm28, %xmm28 + +// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6} +// CHECK: encoding: [0x62,0x22,0x1d,0x06,0x0c,0xe6] + vpermilps %xmm22, %xmm28, %xmm28 {%k6} + +// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0x1d,0x86,0x0c,0xe6] + vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z} + +// CHECK: vpermilps (%rcx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x21] + vpermilps (%rcx), %xmm28, %xmm28 + +// CHECK: vpermilps 291(%rax,%r14,8), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpermilps 291(%rax,%r14,8), %xmm28, %xmm28 + +// CHECK: vpermilps (%rcx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x21] + vpermilps (%rcx){1to4}, %xmm28, %xmm28 + +// CHECK: vpermilps 2032(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x7f] + vpermilps 2032(%rdx), %xmm28, %xmm28 + +// CHECK: vpermilps 2048(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0x00,0x08,0x00,0x00] + vpermilps 2048(%rdx), %xmm28, %xmm28 + +// CHECK: vpermilps -2048(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x80] + vpermilps -2048(%rdx), %xmm28, %xmm28 + +// CHECK: vpermilps -2064(%rdx), %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0xf0,0xf7,0xff,0xff] + vpermilps -2064(%rdx), %xmm28, %xmm28 + +// CHECK: vpermilps 508(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x7f] + vpermilps 508(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vpermilps 512(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0x00,0x02,0x00,0x00] + vpermilps 512(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vpermilps -512(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x80] + vpermilps -512(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vpermilps -516(%rdx){1to4}, %xmm28, %xmm28 +// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0xfc,0xfd,0xff,0xff] + vpermilps -516(%rdx){1to4}, %xmm28, %xmm28 + +// CHECK: vpermilps %ymm21, %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xed] + vpermilps %ymm21, %ymm28, %ymm29 + +// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2} +// CHECK: encoding: [0x62,0x22,0x1d,0x22,0x0c,0xed] + vpermilps %ymm21, %ymm28, %ymm29 {%k2} + +// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x1d,0xa2,0x0c,0xed] + vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z} + +// CHECK: vpermilps (%rcx), %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x29] + vpermilps (%rcx), %ymm28, %ymm29 + +// CHECK: vpermilps 291(%rax,%r14,8), %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00] + vpermilps 291(%rax,%r14,8), %ymm28, %ymm29 + +// CHECK: vpermilps (%rcx){1to8}, %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x29] + vpermilps (%rcx){1to8}, %ymm28, %ymm29 + +// CHECK: vpermilps 4064(%rdx), %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x7f] + vpermilps 4064(%rdx), %ymm28, %ymm29 + +// CHECK: vpermilps 4096(%rdx), %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0x00,0x10,0x00,0x00] + vpermilps 4096(%rdx), %ymm28, %ymm29 + +// CHECK: vpermilps -4096(%rdx), %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x80] + vpermilps -4096(%rdx), %ymm28, %ymm29 + +// CHECK: vpermilps -4128(%rdx), %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0xe0,0xef,0xff,0xff] + vpermilps -4128(%rdx), %ymm28, %ymm29 + +// CHECK: vpermilps 508(%rdx){1to8}, %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x7f] + vpermilps 508(%rdx){1to8}, %ymm28, %ymm29 + +// CHECK: vpermilps 512(%rdx){1to8}, %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0x00,0x02,0x00,0x00] + vpermilps 512(%rdx){1to8}, %ymm28, %ymm29 + +// CHECK: vpermilps -512(%rdx){1to8}, %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x80] + vpermilps -512(%rdx){1to8}, %ymm28, %ymm29 + +// CHECK: vpermilps -516(%rdx){1to8}, %ymm28, %ymm29 +// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0xfc,0xfd,0xff,0xff] + vpermilps -516(%rdx){1to8}, %ymm28, %ymm29 + +// CHECK: vpermilpd $171, %xmm19, %xmm29 +// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0xab] + vpermilpd $0xab, %xmm19, %xmm29 + +// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7} +// CHECK: encoding: [0x62,0x23,0xfd,0x0f,0x05,0xeb,0xab] + vpermilpd $0xab, %xmm19, %xmm29 {%k7} + +// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7} {z} +// CHECK: encoding: [0x62,0x23,0xfd,0x8f,0x05,0xeb,0xab] + vpermilpd $0xab, %xmm19, %xmm29 {%k7} {z} + +// CHECK: vpermilpd $123, %xmm19, %xmm29 +// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0x7b] + vpermilpd $0x7b, %xmm19, %xmm29 + +// CHECK: vpermilpd $123, (%rcx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x29,0x7b] + vpermilpd $0x7b, (%rcx), %xmm29 + +// CHECK: vpermilpd $123, 291(%rax,%r14,8), %xmm29 +// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermilpd $0x7b, 291(%rax,%r14,8), %xmm29 + +// CHECK: vpermilpd $123, (%rcx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x29,0x7b] + vpermilpd $0x7b, (%rcx){1to2}, %xmm29 + +// CHECK: vpermilpd $123, 2032(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x7f,0x7b] + vpermilpd $0x7b, 2032(%rdx), %xmm29 + +// CHECK: vpermilpd $123, 2048(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0x00,0x08,0x00,0x00,0x7b] + vpermilpd $0x7b, 2048(%rdx), %xmm29 + +// CHECK: vpermilpd $123, -2048(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x80,0x7b] + vpermilpd $0x7b, -2048(%rdx), %xmm29 + +// CHECK: vpermilpd $123, -2064(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0xf0,0xf7,0xff,0xff,0x7b] + vpermilpd $0x7b, -2064(%rdx), %xmm29 + +// CHECK: vpermilpd $123, 1016(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x7f,0x7b] + vpermilpd $0x7b, 1016(%rdx){1to2}, %xmm29 + +// CHECK: vpermilpd $123, 1024(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0x00,0x04,0x00,0x00,0x7b] + vpermilpd $0x7b, 1024(%rdx){1to2}, %xmm29 + +// CHECK: vpermilpd $123, -1024(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x80,0x7b] + vpermilpd $0x7b, -1024(%rdx){1to2}, %xmm29 + +// CHECK: vpermilpd $123, -1032(%rdx){1to2}, %xmm29 +// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0xf8,0xfb,0xff,0xff,0x7b] + vpermilpd $0x7b, -1032(%rdx){1to2}, %xmm29 + +// CHECK: vpermilpd $171, %ymm24, %ymm17 +// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0xab] + vpermilpd $0xab, %ymm24, %ymm17 + +// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6} +// CHECK: encoding: [0x62,0x83,0xfd,0x2e,0x05,0xc8,0xab] + vpermilpd $0xab, %ymm24, %ymm17 {%k6} + +// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6} {z} +// CHECK: encoding: [0x62,0x83,0xfd,0xae,0x05,0xc8,0xab] + vpermilpd $0xab, %ymm24, %ymm17 {%k6} {z} + +// CHECK: vpermilpd $123, %ymm24, %ymm17 +// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0x7b] + vpermilpd $0x7b, %ymm24, %ymm17 + +// CHECK: vpermilpd $123, (%rcx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x09,0x7b] + vpermilpd $0x7b, (%rcx), %ymm17 + +// CHECK: vpermilpd $123, 291(%rax,%r14,8), %ymm17 +// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x05,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpermilpd $0x7b, 291(%rax,%r14,8), %ymm17 + +// CHECK: vpermilpd $123, (%rcx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x09,0x7b] + vpermilpd $0x7b, (%rcx){1to4}, %ymm17 + +// CHECK: vpermilpd $123, 4064(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x7f,0x7b] + vpermilpd $0x7b, 4064(%rdx), %ymm17 + +// CHECK: vpermilpd $123, 4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0x00,0x10,0x00,0x00,0x7b] + vpermilpd $0x7b, 4096(%rdx), %ymm17 + +// CHECK: vpermilpd $123, -4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x80,0x7b] + vpermilpd $0x7b, -4096(%rdx), %ymm17 + +// CHECK: vpermilpd $123, -4128(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0xe0,0xef,0xff,0xff,0x7b] + vpermilpd $0x7b, -4128(%rdx), %ymm17 + +// CHECK: vpermilpd $123, 1016(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x7f,0x7b] + vpermilpd $0x7b, 1016(%rdx){1to4}, %ymm17 + +// CHECK: vpermilpd $123, 1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0x00,0x04,0x00,0x00,0x7b] + vpermilpd $0x7b, 1024(%rdx){1to4}, %ymm17 + +// CHECK: vpermilpd $123, -1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x80,0x7b] + vpermilpd $0x7b, -1024(%rdx){1to4}, %ymm17 + +// CHECK: vpermilpd $123, -1032(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vpermilpd $0x7b, -1032(%rdx){1to4}, %ymm17 + +// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0xd1] + vpermilpd %xmm17, %xmm27, %xmm26 + +// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2} +// CHECK: encoding: [0x62,0x22,0xa5,0x02,0x0d,0xd1] + vpermilpd %xmm17, %xmm27, %xmm26 {%k2} + +// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0xa5,0x82,0x0d,0xd1] + vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z} + +// CHECK: vpermilpd (%rcx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x11] + vpermilpd (%rcx), %xmm27, %xmm26 + +// CHECK: vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00] + vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26 + +// CHECK: vpermilpd (%rcx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x11] + vpermilpd (%rcx){1to2}, %xmm27, %xmm26 + +// CHECK: vpermilpd 2032(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x7f] + vpermilpd 2032(%rdx), %xmm27, %xmm26 + +// CHECK: vpermilpd 2048(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0x00,0x08,0x00,0x00] + vpermilpd 2048(%rdx), %xmm27, %xmm26 + +// CHECK: vpermilpd -2048(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x80] + vpermilpd -2048(%rdx), %xmm27, %xmm26 + +// CHECK: vpermilpd -2064(%rdx), %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0xf0,0xf7,0xff,0xff] + vpermilpd -2064(%rdx), %xmm27, %xmm26 + +// CHECK: vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x7f] + vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0x00,0x04,0x00,0x00] + vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x80] + vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26 +// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0xf8,0xfb,0xff,0xff] + vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26 + +// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x02,0xad,0x20,0x0d,0xd0] + vpermilpd %ymm24, %ymm26, %ymm26 + +// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5} +// CHECK: encoding: [0x62,0x02,0xad,0x25,0x0d,0xd0] + vpermilpd %ymm24, %ymm26, %ymm26 {%k5} + +// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z} +// CHECK: encoding: [0x62,0x02,0xad,0xa5,0x0d,0xd0] + vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z} + +// CHECK: vpermilpd (%rcx), %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x11] + vpermilpd (%rcx), %ymm26, %ymm26 + +// CHECK: vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x22,0xad,0x20,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00] + vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26 + +// CHECK: vpermilpd (%rcx){1to4}, %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x11] + vpermilpd (%rcx){1to4}, %ymm26, %ymm26 + +// CHECK: vpermilpd 4064(%rdx), %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x7f] + vpermilpd 4064(%rdx), %ymm26, %ymm26 + +// CHECK: vpermilpd 4096(%rdx), %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0x00,0x10,0x00,0x00] + vpermilpd 4096(%rdx), %ymm26, %ymm26 + +// CHECK: vpermilpd -4096(%rdx), %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x80] + vpermilpd -4096(%rdx), %ymm26, %ymm26 + +// CHECK: vpermilpd -4128(%rdx), %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0xe0,0xef,0xff,0xff] + vpermilpd -4128(%rdx), %ymm26, %ymm26 + +// CHECK: vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x7f] + vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26 + +// CHECK: vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0x00,0x04,0x00,0x00] + vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26 + +// CHECK: vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x80] + vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26 + +// CHECK: vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26 +// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0xf8,0xfb,0xff,0xff] + vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26 +